Exemplo n.º 1
0
 @Override
 protected void doStop() throws ElasticsearchException {
   final boolean setStopped = started.compareAndSet(true, false);
   assert setStopped : "service has already been stopped";
   try {
     transport.stop();
   } finally {
     // in case the transport is not connected to our local node (thus cleaned on node disconnect)
     // make sure to clean any leftover on going handles
     for (Map.Entry<Long, RequestHolder> entry : clientHandlers.entrySet()) {
       final RequestHolder holderToNotify = clientHandlers.remove(entry.getKey());
       if (holderToNotify != null) {
         // callback that an exception happened, but on a different thread since we don't
         // want handlers to worry about stack overflows
         threadPool
             .generic()
             .execute(
                 new Runnable() {
                   @Override
                   public void run() {
                     holderToNotify
                         .handler()
                         .handleException(
                             new TransportException(
                                 "transport stopped, action: " + holderToNotify.action()));
                   }
                 });
       }
     }
   }
 }
 public void startRecovery(
     final IndexShard indexShard,
     final DiscoveryNode sourceNode,
     final RecoveryListener listener) {
   // create a new recovery status, and process...
   final long recoveryId =
       onGoingRecoveries.startRecovery(
           indexShard, sourceNode, listener, recoverySettings.activityTimeout());
   threadPool.generic().execute(new RecoveryRunner(recoveryId));
 }
 private void notifyDisconnectedFromMaster() {
   threadPool
       .generic()
       .execute(
           new Runnable() {
             @Override
             public void run() {
               for (Listener listener : listeners) {
                 listener.onDisconnectedFromMaster();
               }
             }
           });
 }
 private void notifyMasterFailure(final DiscoveryNode masterNode, final String reason) {
   if (notifiedMasterFailure.compareAndSet(false, true)) {
     threadPool
         .generic()
         .execute(
             new Runnable() {
               @Override
               public void run() {
                 for (Listener listener : listeners) {
                   listener.onMasterFailure(masterNode, reason);
                 }
               }
             });
     stop("master failure, " + reason);
   }
 }
    @Override
    public void performRequest(
        String method,
        String uri,
        Map<String, String> params,
        HttpEntity entity,
        ResponseListener listener) {
      /*
       * We use the generic thread pool here because this client is blocking the generic thread pool is sized appropriately for some
       * of the threads on it to be blocked, waiting on IO. It'd be a disaster if this ran on the listener thread pool, eating
       * valuable threads needed to handle responses. Most other thread pool would probably not mind running this either, but the
       * generic thread pool is the "most right" place for it to run. We could make our own thread pool for this but the generic
       * thread pool already has plenty of capacity.
       */
      threadPool
          .generic()
          .execute(
              new AbstractRunnable() {
                @Override
                protected void doRun() throws Exception {
                  try (org.elasticsearch.client.Response response =
                      restClient.performRequest(method, uri, params, entity)) {
                    InputStream markSupportedInputStream =
                        new BufferedInputStream(response.getEntity().getContent());
                    listener.onResponse(markSupportedInputStream);
                  }
                }

                @Override
                public void onFailure(Exception t) {
                  if (t instanceof ResponseException) {
                    ResponseException re = (ResponseException) t;
                    if (RestStatus.TOO_MANY_REQUESTS.getStatus()
                        == re.getResponse().getStatusLine().getStatusCode()) {
                      listener.onRetryableFailure(t);
                      return;
                    }
                  }
                  listener.onFailure(t);
                }
              });
    }
Exemplo n.º 6
0
  private void performStateRecovery(boolean enforceRecoverAfterTime, String reason) {
    final Gateway.GatewayStateRecoveredListener recoveryListener = new GatewayRecoveryListener();

    if (enforceRecoverAfterTime && recoverAfterTime != null) {
      if (scheduledRecovery.compareAndSet(false, true)) {
        logger.info("delaying initial state recovery for [{}]. {}", recoverAfterTime, reason);
        threadPool.schedule(
            recoverAfterTime,
            ThreadPool.Names.GENERIC,
            () -> {
              if (recovered.compareAndSet(false, true)) {
                logger.info(
                    "recover_after_time [{}] elapsed. performing state recovery...",
                    recoverAfterTime);
                gateway.performStateRecovery(recoveryListener);
              }
            });
      }
    } else {
      if (recovered.compareAndSet(false, true)) {
        threadPool
            .generic()
            .execute(
                new AbstractRunnable() {
                  @Override
                  public void onFailure(Exception e) {
                    logger.warn("Recovery failed", e);
                    // we reset `recovered` in the listener don't reset it here otherwise there
                    // might be a race
                    // that resets it to false while a new recover is already running?
                    recoveryListener.onFailure("state recovery failed: " + e.getMessage());
                  }

                  @Override
                  protected void doRun() throws Exception {
                    gateway.performStateRecovery(recoveryListener);
                  }
                });
      }
    }
  }
Exemplo n.º 7
0
 private void asyncJoinCluster() {
   if (currentJoinThread != null) {
     // we are already joining, ignore...
     logger.trace("a join thread already running");
     return;
   }
   threadPool
       .generic()
       .execute(
           new Runnable() {
             @Override
             public void run() {
               currentJoinThread = Thread.currentThread();
               try {
                 innerJoinCluster();
               } finally {
                 currentJoinThread = null;
               }
             }
           });
 }
 public void nodeIndexDeleted(final String index, final String nodeId)
     throws ElasticSearchException {
   DiscoveryNodes nodes = clusterService.state().nodes();
   if (nodes.localNodeMaster()) {
     threadPool
         .generic()
         .execute(
             new Runnable() {
               @Override
               public void run() {
                 innerNodeIndexDeleted(index, nodeId);
               }
             });
   } else {
     transportService.sendRequest(
         clusterService.state().nodes().masterNode(),
         NodeIndexDeletedTransportHandler.ACTION,
         new NodeIndexDeletedMessage(index, nodeId),
         EmptyTransportResponseHandler.INSTANCE_SAME);
   }
 }
  /**
   * Submits a batch of cluster state update tasks; submitted updates are guaranteed to be processed
   * together, potentially with more tasks of the same executor.
   *
   * @param source the source of the cluster state update task
   * @param tasks a map of update tasks and their corresponding listeners
   * @param config the cluster state update task configuration
   * @param executor the cluster state update task executor; tasks that share the same executor will
   *     be executed batches on this executor
   * @param <T> the type of the cluster state update task state
   */
  public <T> void submitStateUpdateTasks(
      final String source,
      final Map<T, ClusterStateTaskListener> tasks,
      final ClusterStateTaskConfig config,
      final ClusterStateTaskExecutor<T> executor) {
    if (!lifecycle.started()) {
      return;
    }
    if (tasks.isEmpty()) {
      return;
    }
    try {
      // convert to an identity map to check for dups based on update tasks semantics of using
      // identity instead of equal
      final IdentityHashMap<T, ClusterStateTaskListener> tasksIdentity =
          new IdentityHashMap<>(tasks);
      final List<UpdateTask<T>> updateTasks =
          tasksIdentity
              .entrySet()
              .stream()
              .map(
                  entry ->
                      new UpdateTask<>(
                          source, entry.getKey(), config, executor, safe(entry.getValue(), logger)))
              .collect(Collectors.toList());

      synchronized (updateTasksPerExecutor) {
        List<UpdateTask> existingTasks =
            updateTasksPerExecutor.computeIfAbsent(executor, k -> new ArrayList<>());
        for (@SuppressWarnings("unchecked") UpdateTask<T> existing : existingTasks) {
          if (tasksIdentity.containsKey(existing.task)) {
            throw new IllegalStateException(
                "task ["
                    + executor.describeTasks(Collections.singletonList(existing.task))
                    + "] with source ["
                    + source
                    + "] is already queued");
          }
        }
        existingTasks.addAll(updateTasks);
      }

      final UpdateTask<T> firstTask = updateTasks.get(0);

      if (config.timeout() != null) {
        updateTasksExecutor.execute(
            firstTask,
            threadPool.scheduler(),
            config.timeout(),
            () ->
                threadPool
                    .generic()
                    .execute(
                        () -> {
                          for (UpdateTask<T> task : updateTasks) {
                            if (task.processed.getAndSet(true) == false) {
                              logger.debug(
                                  "cluster state update task [{}] timed out after [{}]",
                                  source,
                                  config.timeout());
                              task.listener.onFailure(
                                  source,
                                  new ProcessClusterEventTimeoutException(
                                      config.timeout(), source));
                            }
                          }
                        }));
      } else {
        updateTasksExecutor.execute(firstTask);
      }
    } catch (EsRejectedExecutionException e) {
      // ignore cases where we are shutting down..., there is really nothing interesting
      // to be done here...
      if (!lifecycle.stoppedOrClosed()) {
        throw e;
      }
    }
  }
  /** Recovers the state of the shard from the gateway. */
  public void recover(final boolean indexShouldExists, final RecoveryListener listener)
      throws IndexShardGatewayRecoveryException, IgnoreGatewayRecoveryException {
    if (indexShard.state() == IndexShardState.CLOSED) {
      // got closed on us, just ignore this recovery
      listener.onIgnoreRecovery("shard closed");
      return;
    }
    if (!indexShard.routingEntry().primary()) {
      listener.onRecoveryFailed(
          new IndexShardGatewayRecoveryException(
              shardId, "Trying to recover when the shard is in backup state", null));
      return;
    }
    try {
      if (indexShard.routingEntry().restoreSource() != null) {
        indexShard.recovering("from snapshot");
      } else {
        indexShard.recovering("from gateway");
      }
    } catch (IllegalIndexShardStateException e) {
      // that's fine, since we might be called concurrently, just ignore this, we are already
      // recovering
      listener.onIgnoreRecovery("already in recovering process, " + e.getMessage());
      return;
    }

    threadPool
        .generic()
        .execute(
            new Runnable() {
              @Override
              public void run() {
                recoveryStatus = new RecoveryStatus();
                recoveryStatus.updateStage(RecoveryStatus.Stage.INIT);

                try {
                  if (indexShard.routingEntry().restoreSource() != null) {
                    logger.debug(
                        "restoring from {} ...", indexShard.routingEntry().restoreSource());
                    snapshotService.restore(recoveryStatus);
                  } else {
                    logger.debug("starting recovery from {} ...", shardGateway);
                    shardGateway.recover(indexShouldExists, recoveryStatus);
                  }

                  lastIndexVersion = recoveryStatus.index().version();
                  lastTranslogId = -1;
                  lastTranslogLength = 0;
                  lastTotalTranslogOperations =
                      recoveryStatus.translog().currentTranslogOperations();

                  // start the shard if the gateway has not started it already. Note that if the
                  // gateway
                  // moved shard to POST_RECOVERY, it may have been started as well if:
                  // 1) master sent a new cluster state indicating shard is initializing
                  // 2) IndicesClusterStateService#applyInitializingShard will send a shard started
                  // event
                  // 3) Master will mark shard as started and this will be processed locally.
                  IndexShardState shardState = indexShard.state();
                  if (shardState != IndexShardState.POST_RECOVERY
                      && shardState != IndexShardState.STARTED) {
                    indexShard.postRecovery("post recovery from gateway");
                  }
                  // refresh the shard
                  indexShard.refresh(new Engine.Refresh("post_gateway").force(true));

                  recoveryStatus.time(System.currentTimeMillis() - recoveryStatus.startTime());
                  recoveryStatus.updateStage(RecoveryStatus.Stage.DONE);

                  if (logger.isDebugEnabled()) {
                    logger.debug(
                        "recovery completed from [{}], took [{}]",
                        shardGateway,
                        timeValueMillis(recoveryStatus.time()));
                  } else if (logger.isTraceEnabled()) {
                    StringBuilder sb = new StringBuilder();
                    sb.append("recovery completed from ")
                        .append(shardGateway)
                        .append(", took [")
                        .append(timeValueMillis(recoveryStatus.time()))
                        .append("]\n");
                    sb.append("    index    : files           [")
                        .append(recoveryStatus.index().numberOfFiles())
                        .append("] with total_size [")
                        .append(new ByteSizeValue(recoveryStatus.index().totalSize()))
                        .append("], took[")
                        .append(TimeValue.timeValueMillis(recoveryStatus.index().time()))
                        .append("]\n");
                    sb.append("             : recovered_files [")
                        .append(recoveryStatus.index().numberOfRecoveredFiles())
                        .append("] with total_size [")
                        .append(new ByteSizeValue(recoveryStatus.index().recoveredTotalSize()))
                        .append("]\n");
                    sb.append("             : reusing_files   [")
                        .append(recoveryStatus.index().numberOfReusedFiles())
                        .append("] with total_size [")
                        .append(new ByteSizeValue(recoveryStatus.index().reusedTotalSize()))
                        .append("]\n");
                    sb.append("    start    : took [")
                        .append(TimeValue.timeValueMillis(recoveryStatus.start().time()))
                        .append("], check_index [")
                        .append(timeValueMillis(recoveryStatus.start().checkIndexTime()))
                        .append("]\n");
                    sb.append("    translog : number_of_operations [")
                        .append(recoveryStatus.translog().currentTranslogOperations())
                        .append("], took [")
                        .append(TimeValue.timeValueMillis(recoveryStatus.translog().time()))
                        .append("]");
                    logger.trace(sb.toString());
                  }
                  listener.onRecoveryDone();
                  scheduleSnapshotIfNeeded();
                } catch (IndexShardGatewayRecoveryException e) {
                  if (indexShard.state() == IndexShardState.CLOSED) {
                    // got closed on us, just ignore this recovery
                    listener.onIgnoreRecovery("shard closed");
                    return;
                  }
                  if ((e.getCause() instanceof IndexShardClosedException)
                      || (e.getCause() instanceof IndexShardNotStartedException)) {
                    // got closed on us, just ignore this recovery
                    listener.onIgnoreRecovery("shard closed");
                    return;
                  }
                  listener.onRecoveryFailed(e);
                } catch (IndexShardClosedException e) {
                  listener.onIgnoreRecovery("shard closed");
                } catch (IndexShardNotStartedException e) {
                  listener.onIgnoreRecovery("shard closed");
                } catch (Exception e) {
                  if (indexShard.state() == IndexShardState.CLOSED) {
                    // got closed on us, just ignore this recovery
                    listener.onIgnoreRecovery("shard closed");
                    return;
                  }
                  listener.onRecoveryFailed(
                      new IndexShardGatewayRecoveryException(shardId, "failed recovery", e));
                }
              }
            });
  }