/**
  * Tries to lock all local shards for the given index. If any of the shard locks can't be acquired
  * an {@link LockObtainFailedException} is thrown and all previously acquired locks are released.
  *
  * @param index the index to lock shards for
  * @param lockTimeoutMS how long to wait for acquiring the indices shard locks
  * @return the {@link ShardLock} instances for this index.
  * @throws IOException if an IOException occurs.
  */
 public List<ShardLock> lockAllForIndex(
     Index index, @IndexSettings Settings settings, long lockTimeoutMS) throws IOException {
   final Integer numShards = settings.getAsInt(IndexMetaData.SETTING_NUMBER_OF_SHARDS, null);
   if (numShards == null || numShards <= 0) {
     throw new IllegalArgumentException("settings must contain a non-null > 0 number of shards");
   }
   logger.trace("locking all shards for index {} - [{}]", index, numShards);
   List<ShardLock> allLocks = new ArrayList<>(numShards);
   boolean success = false;
   long startTimeNS = System.nanoTime();
   try {
     for (int i = 0; i < numShards; i++) {
       long timeoutLeftMS =
           Math.max(0, lockTimeoutMS - TimeValue.nsecToMSec((System.nanoTime() - startTimeNS)));
       allLocks.add(shardLock(new ShardId(index, i), timeoutLeftMS));
     }
     success = true;
   } finally {
     if (success == false) {
       logger.trace("unable to lock all shards for index {}", index);
       IOUtils.closeWhileHandlingException(allLocks);
     }
   }
   return allLocks;
 }
  <T> void runTasksForExecutor(ClusterStateTaskExecutor<T> executor) {
    final ArrayList<UpdateTask<T>> toExecute = new ArrayList<>();
    final Map<String, ArrayList<T>> processTasksBySource = new HashMap<>();
    synchronized (updateTasksPerExecutor) {
      List<UpdateTask> pending = updateTasksPerExecutor.remove(executor);
      if (pending != null) {
        for (UpdateTask<T> task : pending) {
          if (task.processed.getAndSet(true) == false) {
            logger.trace("will process {}", task.toString(executor));
            toExecute.add(task);
            processTasksBySource
                .computeIfAbsent(task.source, s -> new ArrayList<>())
                .add(task.task);
          } else {
            logger.trace("skipping {}, already processed", task.toString(executor));
          }
        }
      }
    }
    if (toExecute.isEmpty()) {
      return;
    }
    final String tasksSummary =
        processTasksBySource
            .entrySet()
            .stream()
            .map(
                entry -> {
                  String tasks = executor.describeTasks(entry.getValue());
                  return tasks.isEmpty() ? entry.getKey() : entry.getKey() + "[" + tasks + "]";
                })
            .reduce((s1, s2) -> s1 + ", " + s2)
            .orElse("");

    if (!lifecycle.started()) {
      logger.debug("processing [{}]: ignoring, cluster_service not started", tasksSummary);
      return;
    }
    logger.debug("processing [{}]: execute", tasksSummary);
    ClusterState previousClusterState = clusterState;
    if (!previousClusterState.nodes().isLocalNodeElectedMaster() && executor.runOnlyOnMaster()) {
      logger.debug("failing [{}]: local node is no longer master", tasksSummary);
      toExecute.stream().forEach(task -> task.listener.onNoLongerMaster(task.source));
      return;
    }
    ClusterStateTaskExecutor.BatchResult<T> batchResult;
    long startTimeNS = currentTimeInNanos();
    try {
      List<T> inputs =
          toExecute.stream().map(tUpdateTask -> tUpdateTask.task).collect(Collectors.toList());
      batchResult = executor.execute(previousClusterState, inputs);
    } catch (Exception e) {
      TimeValue executionTime =
          TimeValue.timeValueMillis(
              Math.max(0, TimeValue.nsecToMSec(currentTimeInNanos() - startTimeNS)));
      if (logger.isTraceEnabled()) {
        logger.trace(
            (Supplier<?>)
                () ->
                    new ParameterizedMessage(
                        "failed to execute cluster state update in [{}], state:\nversion [{}], source [{}]\n{}{}{}",
                        executionTime,
                        previousClusterState.version(),
                        tasksSummary,
                        previousClusterState.nodes().prettyPrint(),
                        previousClusterState.routingTable().prettyPrint(),
                        previousClusterState.getRoutingNodes().prettyPrint()),
            e);
      }
      warnAboutSlowTaskIfNeeded(executionTime, tasksSummary);
      batchResult =
          ClusterStateTaskExecutor.BatchResult.<T>builder()
              .failures(toExecute.stream().map(updateTask -> updateTask.task)::iterator, e)
              .build(previousClusterState);
    }

    assert batchResult.executionResults != null;
    assert batchResult.executionResults.size() == toExecute.size()
        : String.format(
            Locale.ROOT,
            "expected [%d] task result%s but was [%d]",
            toExecute.size(),
            toExecute.size() == 1 ? "" : "s",
            batchResult.executionResults.size());
    boolean assertsEnabled = false;
    assert (assertsEnabled = true);
    if (assertsEnabled) {
      for (UpdateTask<T> updateTask : toExecute) {
        assert batchResult.executionResults.containsKey(updateTask.task)
            : "missing task result for " + updateTask.toString(executor);
      }
    }

    ClusterState newClusterState = batchResult.resultingState;
    final ArrayList<UpdateTask<T>> proccessedListeners = new ArrayList<>();
    // fail all tasks that have failed and extract those that are waiting for results
    for (UpdateTask<T> updateTask : toExecute) {
      assert batchResult.executionResults.containsKey(updateTask.task)
          : "missing " + updateTask.toString(executor);
      final ClusterStateTaskExecutor.TaskResult executionResult =
          batchResult.executionResults.get(updateTask.task);
      executionResult.handle(
          () -> proccessedListeners.add(updateTask),
          ex -> {
            logger.debug(
                (Supplier<?>)
                    () ->
                        new ParameterizedMessage(
                            "cluster state update task {} failed", updateTask.toString(executor)),
                ex);
            updateTask.listener.onFailure(updateTask.source, ex);
          });
    }

    if (previousClusterState == newClusterState) {
      for (UpdateTask<T> task : proccessedListeners) {
        if (task.listener instanceof AckedClusterStateTaskListener) {
          // no need to wait for ack if nothing changed, the update can be counted as acknowledged
          ((AckedClusterStateTaskListener) task.listener).onAllNodesAcked(null);
        }
        task.listener.clusterStateProcessed(task.source, previousClusterState, newClusterState);
      }
      TimeValue executionTime =
          TimeValue.timeValueMillis(
              Math.max(0, TimeValue.nsecToMSec(currentTimeInNanos() - startTimeNS)));
      logger.debug(
          "processing [{}]: took [{}] no change in cluster_state", tasksSummary, executionTime);
      warnAboutSlowTaskIfNeeded(executionTime, tasksSummary);
      return;
    }

    try {
      ArrayList<Discovery.AckListener> ackListeners = new ArrayList<>();
      if (newClusterState.nodes().isLocalNodeElectedMaster()) {
        // only the master controls the version numbers
        Builder builder = ClusterState.builder(newClusterState).incrementVersion();
        if (previousClusterState.routingTable() != newClusterState.routingTable()) {
          builder.routingTable(
              RoutingTable.builder(newClusterState.routingTable())
                  .version(newClusterState.routingTable().version() + 1)
                  .build());
        }
        if (previousClusterState.metaData() != newClusterState.metaData()) {
          builder.metaData(
              MetaData.builder(newClusterState.metaData())
                  .version(newClusterState.metaData().version() + 1));
        }
        newClusterState = builder.build();
        for (UpdateTask<T> task : proccessedListeners) {
          if (task.listener instanceof AckedClusterStateTaskListener) {
            final AckedClusterStateTaskListener ackedListener =
                (AckedClusterStateTaskListener) task.listener;
            if (ackedListener.ackTimeout() == null || ackedListener.ackTimeout().millis() == 0) {
              ackedListener.onAckTimeout();
            } else {
              try {
                ackListeners.add(
                    new AckCountDownListener(
                        ackedListener,
                        newClusterState.version(),
                        newClusterState.nodes(),
                        threadPool));
              } catch (EsRejectedExecutionException ex) {
                if (logger.isDebugEnabled()) {
                  logger.debug(
                      "Couldn't schedule timeout thread - node might be shutting down", ex);
                }
                // timeout straightaway, otherwise we could wait forever as the timeout thread has
                // not started
                ackedListener.onAckTimeout();
              }
            }
          }
        }
      }
      final Discovery.AckListener ackListener = new DelegetingAckListener(ackListeners);

      newClusterState.status(ClusterState.ClusterStateStatus.BEING_APPLIED);

      if (logger.isTraceEnabled()) {
        logger.trace(
            "cluster state updated, source [{}]\n{}", tasksSummary, newClusterState.prettyPrint());
      } else if (logger.isDebugEnabled()) {
        logger.debug(
            "cluster state updated, version [{}], source [{}]",
            newClusterState.version(),
            tasksSummary);
      }

      ClusterChangedEvent clusterChangedEvent =
          new ClusterChangedEvent(tasksSummary, newClusterState, previousClusterState);
      // new cluster state, notify all listeners
      final DiscoveryNodes.Delta nodesDelta = clusterChangedEvent.nodesDelta();
      if (nodesDelta.hasChanges() && logger.isInfoEnabled()) {
        String summary = nodesDelta.shortSummary();
        if (summary.length() > 0) {
          logger.info("{}, reason: {}", summary, tasksSummary);
        }
      }

      nodeConnectionsService.connectToAddedNodes(clusterChangedEvent);

      // if we are the master, publish the new state to all nodes
      // we publish here before we send a notification to all the listeners, since if it fails
      // we don't want to notify
      if (newClusterState.nodes().isLocalNodeElectedMaster()) {
        logger.debug("publishing cluster state version [{}]", newClusterState.version());
        try {
          clusterStatePublisher.accept(clusterChangedEvent, ackListener);
        } catch (Discovery.FailedToCommitClusterStateException t) {
          final long version = newClusterState.version();
          logger.warn(
              (Supplier<?>)
                  () ->
                      new ParameterizedMessage(
                          "failing [{}]: failed to commit cluster state version [{}]",
                          tasksSummary,
                          version),
              t);
          proccessedListeners.forEach(task -> task.listener.onFailure(task.source, t));
          return;
        }
      }

      // update the current cluster state
      clusterState = newClusterState;
      logger.debug("set local cluster state to version {}", newClusterState.version());
      try {
        // nothing to do until we actually recover from the gateway or any other block indicates we
        // need to disable persistency
        if (clusterChangedEvent.state().blocks().disableStatePersistence() == false
            && clusterChangedEvent.metaDataChanged()) {
          final Settings incomingSettings = clusterChangedEvent.state().metaData().settings();
          clusterSettings.applySettings(incomingSettings);
        }
      } catch (Exception ex) {
        logger.warn("failed to apply cluster settings", ex);
      }
      for (ClusterStateListener listener : preAppliedListeners) {
        try {
          listener.clusterChanged(clusterChangedEvent);
        } catch (Exception ex) {
          logger.warn("failed to notify ClusterStateListener", ex);
        }
      }

      nodeConnectionsService.disconnectFromRemovedNodes(clusterChangedEvent);

      newClusterState.status(ClusterState.ClusterStateStatus.APPLIED);

      for (ClusterStateListener listener : postAppliedListeners) {
        try {
          listener.clusterChanged(clusterChangedEvent);
        } catch (Exception ex) {
          logger.warn("failed to notify ClusterStateListener", ex);
        }
      }

      // manual ack only from the master at the end of the publish
      if (newClusterState.nodes().isLocalNodeElectedMaster()) {
        try {
          ackListener.onNodeAck(newClusterState.nodes().getLocalNode(), null);
        } catch (Exception e) {
          final DiscoveryNode localNode = newClusterState.nodes().getLocalNode();
          logger.debug(
              (Supplier<?>)
                  () ->
                      new ParameterizedMessage(
                          "error while processing ack for master node [{}]", localNode),
              e);
        }
      }

      for (UpdateTask<T> task : proccessedListeners) {
        task.listener.clusterStateProcessed(task.source, previousClusterState, newClusterState);
      }

      try {
        executor.clusterStatePublished(clusterChangedEvent);
      } catch (Exception e) {
        logger.error(
            (Supplier<?>)
                () ->
                    new ParameterizedMessage(
                        "exception thrown while notifying executor of new cluster state publication [{}]",
                        tasksSummary),
            e);
      }

      TimeValue executionTime =
          TimeValue.timeValueMillis(
              Math.max(0, TimeValue.nsecToMSec(currentTimeInNanos() - startTimeNS)));
      logger.debug(
          "processing [{}]: took [{}] done applying updated cluster_state (version: {}, uuid: {})",
          tasksSummary,
          executionTime,
          newClusterState.version(),
          newClusterState.stateUUID());
      warnAboutSlowTaskIfNeeded(executionTime, tasksSummary);
    } catch (Exception e) {
      TimeValue executionTime =
          TimeValue.timeValueMillis(
              Math.max(0, TimeValue.nsecToMSec(currentTimeInNanos() - startTimeNS)));
      final long version = newClusterState.version();
      final String stateUUID = newClusterState.stateUUID();
      final String prettyPrint = newClusterState.prettyPrint();
      logger.warn(
          (Supplier<?>)
              () ->
                  new ParameterizedMessage(
                      "failed to apply updated cluster state in [{}]:\nversion [{}], uuid [{}], source [{}]\n{}",
                      executionTime,
                      version,
                      stateUUID,
                      tasksSummary,
                      prettyPrint),
          e);
      // TODO: do we want to call updateTask.onFailure here?
    }
  }
예제 #3
0
  /**
   * Prepares an update request by converting it into an index or delete request or an update
   * response (no action).
   */
  @SuppressWarnings("unchecked")
  protected Result prepare(ShardId shardId, UpdateRequest request, final GetResult getResult) {
    long getDateNS = System.nanoTime();
    if (!getResult.isExists()) {
      if (request.upsertRequest() == null && !request.docAsUpsert()) {
        throw new DocumentMissingException(shardId, request.type(), request.id());
      }
      IndexRequest indexRequest = request.docAsUpsert() ? request.doc() : request.upsertRequest();
      TimeValue ttl = indexRequest.ttl();
      if (request.scriptedUpsert() && request.script() != null) {
        // Run the script to perform the create logic
        IndexRequest upsert = request.upsertRequest();
        Map<String, Object> upsertDoc = upsert.sourceAsMap();
        Map<String, Object> ctx = new HashMap<>(2);
        // Tell the script that this is a create and not an update
        ctx.put("op", "create");
        ctx.put("_source", upsertDoc);
        ctx = executeScript(request.script, ctx);
        // Allow the script to set TTL using ctx._ttl
        if (ttl == null) {
          ttl = getTTLFromScriptContext(ctx);
        }

        // Allow the script to abort the create by setting "op" to "none"
        String scriptOpChoice = (String) ctx.get("op");

        // Only valid options for an upsert script are "create"
        // (the default) or "none", meaning abort upsert
        if (!"create".equals(scriptOpChoice)) {
          if (!"none".equals(scriptOpChoice)) {
            logger.warn(
                "Used upsert operation [{}] for script [{}], doing nothing...",
                scriptOpChoice,
                request.script.getScript());
          }
          UpdateResponse update =
              new UpdateResponse(
                  shardId, getResult.getType(), getResult.getId(), getResult.getVersion(), false);
          update.setGetResult(getResult);
          return new Result(update, Operation.NONE, upsertDoc, XContentType.JSON);
        }
        indexRequest.source((Map) ctx.get("_source"));
      }

      indexRequest
          .index(request.index())
          .type(request.type())
          .id(request.id())
          // it has to be a "create!"
          .create(true)
          .ttl(ttl)
          .refresh(request.refresh())
          .routing(request.routing())
          .parent(request.parent())
          .consistencyLevel(request.consistencyLevel());
      if (request.versionType() != VersionType.INTERNAL) {
        // in all but the internal versioning mode, we want to create the new document using the
        // given version.
        indexRequest.version(request.version()).versionType(request.versionType());
      }
      return new Result(indexRequest, Operation.UPSERT, null, null);
    }

    long updateVersion = getResult.getVersion();

    if (request.versionType() != VersionType.INTERNAL) {
      assert request.versionType() == VersionType.FORCE;
      updateVersion = request.version(); // remember, match_any is excluded by the conflict test
    }

    if (getResult.internalSourceRef() == null) {
      // no source, we can't do nothing, through a failure...
      throw new DocumentSourceMissingException(shardId, request.type(), request.id());
    }

    Tuple<XContentType, Map<String, Object>> sourceAndContent =
        XContentHelper.convertToMap(getResult.internalSourceRef(), true);
    String operation = null;
    String timestamp = null;
    TimeValue ttl = null;
    final Map<String, Object> updatedSourceAsMap;
    final XContentType updateSourceContentType = sourceAndContent.v1();
    String routing =
        getResult.getFields().containsKey(RoutingFieldMapper.NAME)
            ? getResult.field(RoutingFieldMapper.NAME).getValue().toString()
            : null;
    String parent =
        getResult.getFields().containsKey(ParentFieldMapper.NAME)
            ? getResult.field(ParentFieldMapper.NAME).getValue().toString()
            : null;

    if (request.script() == null && request.doc() != null) {
      IndexRequest indexRequest = request.doc();
      updatedSourceAsMap = sourceAndContent.v2();
      if (indexRequest.ttl() != null) {
        ttl = indexRequest.ttl();
      }
      timestamp = indexRequest.timestamp();
      if (indexRequest.routing() != null) {
        routing = indexRequest.routing();
      }
      if (indexRequest.parent() != null) {
        parent = indexRequest.parent();
      }
      boolean noop =
          !XContentHelper.update(
              updatedSourceAsMap, indexRequest.sourceAsMap(), request.detectNoop());
      // noop could still be true even if detectNoop isn't because update detects empty maps as
      // noops.  BUT we can only
      // actually turn the update into a noop if detectNoop is true to preserve backwards
      // compatibility and to handle
      // cases where users repopulating multi-fields or adding synonyms, etc.
      if (request.detectNoop() && noop) {
        operation = "none";
      }
    } else {
      Map<String, Object> ctx = new HashMap<>(16);
      Long originalTtl =
          getResult.getFields().containsKey(TTLFieldMapper.NAME)
              ? (Long) getResult.field(TTLFieldMapper.NAME).getValue()
              : null;
      Long originalTimestamp =
          getResult.getFields().containsKey(TimestampFieldMapper.NAME)
              ? (Long) getResult.field(TimestampFieldMapper.NAME).getValue()
              : null;
      ctx.put("_index", getResult.getIndex());
      ctx.put("_type", getResult.getType());
      ctx.put("_id", getResult.getId());
      ctx.put("_version", getResult.getVersion());
      ctx.put("_routing", routing);
      ctx.put("_parent", parent);
      ctx.put("_timestamp", originalTimestamp);
      ctx.put("_ttl", originalTtl);
      ctx.put("_source", sourceAndContent.v2());

      ctx = executeScript(request.script, ctx);

      operation = (String) ctx.get("op");

      Object fetchedTimestamp = ctx.get("_timestamp");
      if (fetchedTimestamp != null) {
        timestamp = fetchedTimestamp.toString();
      } else if (originalTimestamp != null) {
        // No timestamp has been given in the update script, so we keep the previous timestamp if
        // there is one
        timestamp = originalTimestamp.toString();
      }

      ttl = getTTLFromScriptContext(ctx);

      updatedSourceAsMap = (Map<String, Object>) ctx.get("_source");
    }

    // apply script to update the source
    // No TTL has been given in the update script so we keep previous TTL value if there is one
    if (ttl == null) {
      Long ttlAsLong =
          getResult.getFields().containsKey(TTLFieldMapper.NAME)
              ? (Long) getResult.field(TTLFieldMapper.NAME).getValue()
              : null;
      if (ttlAsLong != null) {
        ttl =
            new TimeValue(
                ttlAsLong
                    - TimeValue.nsecToMSec(
                        System.nanoTime()
                            - getDateNS)); // It is an approximation of exact TTL value, could be
                                           // improved
      }
    }

    if (operation == null || "index".equals(operation)) {
      final IndexRequest indexRequest =
          Requests.indexRequest(request.index())
              .type(request.type())
              .id(request.id())
              .routing(routing)
              .parent(parent)
              .source(updatedSourceAsMap, updateSourceContentType)
              .version(updateVersion)
              .versionType(request.versionType())
              .consistencyLevel(request.consistencyLevel())
              .timestamp(timestamp)
              .ttl(ttl)
              .refresh(request.refresh());
      return new Result(indexRequest, Operation.INDEX, updatedSourceAsMap, updateSourceContentType);
    } else if ("delete".equals(operation)) {
      DeleteRequest deleteRequest =
          Requests.deleteRequest(request.index())
              .type(request.type())
              .id(request.id())
              .routing(routing)
              .parent(parent)
              .version(updateVersion)
              .versionType(request.versionType())
              .consistencyLevel(request.consistencyLevel());
      return new Result(
          deleteRequest, Operation.DELETE, updatedSourceAsMap, updateSourceContentType);
    } else if ("none".equals(operation)) {
      UpdateResponse update =
          new UpdateResponse(
              shardId, getResult.getType(), getResult.getId(), getResult.getVersion(), false);
      update.setGetResult(
          extractGetResult(
              request,
              request.index(),
              getResult.getVersion(),
              updatedSourceAsMap,
              updateSourceContentType,
              getResult.internalSourceRef()));
      return new Result(update, Operation.NONE, updatedSourceAsMap, updateSourceContentType);
    } else {
      logger.warn(
          "Used update operation [{}] for script [{}], doing nothing...",
          operation,
          request.script.getScript());
      UpdateResponse update =
          new UpdateResponse(
              shardId, getResult.getType(), getResult.getId(), getResult.getVersion(), false);
      return new Result(update, Operation.NONE, updatedSourceAsMap, updateSourceContentType);
    }
  }