@Override protected void doStop() throws ElasticsearchException { final boolean setStopped = started.compareAndSet(true, false); assert setStopped : "service has already been stopped"; try { transport.stop(); } finally { // in case the transport is not connected to our local node (thus cleaned on node disconnect) // make sure to clean any leftover on going handles for (Map.Entry<Long, RequestHolder> entry : clientHandlers.entrySet()) { final RequestHolder holderToNotify = clientHandlers.remove(entry.getKey()); if (holderToNotify != null) { // callback that an exception happened, but on a different thread since we don't // want handlers to worry about stack overflows threadPool .generic() .execute( new Runnable() { @Override public void run() { holderToNotify .handler() .handleException( new TransportException( "transport stopped, action: " + holderToNotify.action())); } }); } } } }
public void startRecovery( final IndexShard indexShard, final DiscoveryNode sourceNode, final RecoveryListener listener) { // create a new recovery status, and process... final long recoveryId = onGoingRecoveries.startRecovery( indexShard, sourceNode, listener, recoverySettings.activityTimeout()); threadPool.generic().execute(new RecoveryRunner(recoveryId)); }
private void notifyDisconnectedFromMaster() { threadPool .generic() .execute( new Runnable() { @Override public void run() { for (Listener listener : listeners) { listener.onDisconnectedFromMaster(); } } }); }
private void notifyMasterFailure(final DiscoveryNode masterNode, final String reason) { if (notifiedMasterFailure.compareAndSet(false, true)) { threadPool .generic() .execute( new Runnable() { @Override public void run() { for (Listener listener : listeners) { listener.onMasterFailure(masterNode, reason); } } }); stop("master failure, " + reason); } }
@Override public void performRequest( String method, String uri, Map<String, String> params, HttpEntity entity, ResponseListener listener) { /* * We use the generic thread pool here because this client is blocking the generic thread pool is sized appropriately for some * of the threads on it to be blocked, waiting on IO. It'd be a disaster if this ran on the listener thread pool, eating * valuable threads needed to handle responses. Most other thread pool would probably not mind running this either, but the * generic thread pool is the "most right" place for it to run. We could make our own thread pool for this but the generic * thread pool already has plenty of capacity. */ threadPool .generic() .execute( new AbstractRunnable() { @Override protected void doRun() throws Exception { try (org.elasticsearch.client.Response response = restClient.performRequest(method, uri, params, entity)) { InputStream markSupportedInputStream = new BufferedInputStream(response.getEntity().getContent()); listener.onResponse(markSupportedInputStream); } } @Override public void onFailure(Exception t) { if (t instanceof ResponseException) { ResponseException re = (ResponseException) t; if (RestStatus.TOO_MANY_REQUESTS.getStatus() == re.getResponse().getStatusLine().getStatusCode()) { listener.onRetryableFailure(t); return; } } listener.onFailure(t); } }); }
private void performStateRecovery(boolean enforceRecoverAfterTime, String reason) { final Gateway.GatewayStateRecoveredListener recoveryListener = new GatewayRecoveryListener(); if (enforceRecoverAfterTime && recoverAfterTime != null) { if (scheduledRecovery.compareAndSet(false, true)) { logger.info("delaying initial state recovery for [{}]. {}", recoverAfterTime, reason); threadPool.schedule( recoverAfterTime, ThreadPool.Names.GENERIC, () -> { if (recovered.compareAndSet(false, true)) { logger.info( "recover_after_time [{}] elapsed. performing state recovery...", recoverAfterTime); gateway.performStateRecovery(recoveryListener); } }); } } else { if (recovered.compareAndSet(false, true)) { threadPool .generic() .execute( new AbstractRunnable() { @Override public void onFailure(Exception e) { logger.warn("Recovery failed", e); // we reset `recovered` in the listener don't reset it here otherwise there // might be a race // that resets it to false while a new recover is already running? recoveryListener.onFailure("state recovery failed: " + e.getMessage()); } @Override protected void doRun() throws Exception { gateway.performStateRecovery(recoveryListener); } }); } } }
private void asyncJoinCluster() { if (currentJoinThread != null) { // we are already joining, ignore... logger.trace("a join thread already running"); return; } threadPool .generic() .execute( new Runnable() { @Override public void run() { currentJoinThread = Thread.currentThread(); try { innerJoinCluster(); } finally { currentJoinThread = null; } } }); }
public void nodeIndexDeleted(final String index, final String nodeId) throws ElasticSearchException { DiscoveryNodes nodes = clusterService.state().nodes(); if (nodes.localNodeMaster()) { threadPool .generic() .execute( new Runnable() { @Override public void run() { innerNodeIndexDeleted(index, nodeId); } }); } else { transportService.sendRequest( clusterService.state().nodes().masterNode(), NodeIndexDeletedTransportHandler.ACTION, new NodeIndexDeletedMessage(index, nodeId), EmptyTransportResponseHandler.INSTANCE_SAME); } }
/** * Submits a batch of cluster state update tasks; submitted updates are guaranteed to be processed * together, potentially with more tasks of the same executor. * * @param source the source of the cluster state update task * @param tasks a map of update tasks and their corresponding listeners * @param config the cluster state update task configuration * @param executor the cluster state update task executor; tasks that share the same executor will * be executed batches on this executor * @param <T> the type of the cluster state update task state */ public <T> void submitStateUpdateTasks( final String source, final Map<T, ClusterStateTaskListener> tasks, final ClusterStateTaskConfig config, final ClusterStateTaskExecutor<T> executor) { if (!lifecycle.started()) { return; } if (tasks.isEmpty()) { return; } try { // convert to an identity map to check for dups based on update tasks semantics of using // identity instead of equal final IdentityHashMap<T, ClusterStateTaskListener> tasksIdentity = new IdentityHashMap<>(tasks); final List<UpdateTask<T>> updateTasks = tasksIdentity .entrySet() .stream() .map( entry -> new UpdateTask<>( source, entry.getKey(), config, executor, safe(entry.getValue(), logger))) .collect(Collectors.toList()); synchronized (updateTasksPerExecutor) { List<UpdateTask> existingTasks = updateTasksPerExecutor.computeIfAbsent(executor, k -> new ArrayList<>()); for (@SuppressWarnings("unchecked") UpdateTask<T> existing : existingTasks) { if (tasksIdentity.containsKey(existing.task)) { throw new IllegalStateException( "task [" + executor.describeTasks(Collections.singletonList(existing.task)) + "] with source [" + source + "] is already queued"); } } existingTasks.addAll(updateTasks); } final UpdateTask<T> firstTask = updateTasks.get(0); if (config.timeout() != null) { updateTasksExecutor.execute( firstTask, threadPool.scheduler(), config.timeout(), () -> threadPool .generic() .execute( () -> { for (UpdateTask<T> task : updateTasks) { if (task.processed.getAndSet(true) == false) { logger.debug( "cluster state update task [{}] timed out after [{}]", source, config.timeout()); task.listener.onFailure( source, new ProcessClusterEventTimeoutException( config.timeout(), source)); } } })); } else { updateTasksExecutor.execute(firstTask); } } catch (EsRejectedExecutionException e) { // ignore cases where we are shutting down..., there is really nothing interesting // to be done here... if (!lifecycle.stoppedOrClosed()) { throw e; } } }
/** Recovers the state of the shard from the gateway. */ public void recover(final boolean indexShouldExists, final RecoveryListener listener) throws IndexShardGatewayRecoveryException, IgnoreGatewayRecoveryException { if (indexShard.state() == IndexShardState.CLOSED) { // got closed on us, just ignore this recovery listener.onIgnoreRecovery("shard closed"); return; } if (!indexShard.routingEntry().primary()) { listener.onRecoveryFailed( new IndexShardGatewayRecoveryException( shardId, "Trying to recover when the shard is in backup state", null)); return; } try { if (indexShard.routingEntry().restoreSource() != null) { indexShard.recovering("from snapshot"); } else { indexShard.recovering("from gateway"); } } catch (IllegalIndexShardStateException e) { // that's fine, since we might be called concurrently, just ignore this, we are already // recovering listener.onIgnoreRecovery("already in recovering process, " + e.getMessage()); return; } threadPool .generic() .execute( new Runnable() { @Override public void run() { recoveryStatus = new RecoveryStatus(); recoveryStatus.updateStage(RecoveryStatus.Stage.INIT); try { if (indexShard.routingEntry().restoreSource() != null) { logger.debug( "restoring from {} ...", indexShard.routingEntry().restoreSource()); snapshotService.restore(recoveryStatus); } else { logger.debug("starting recovery from {} ...", shardGateway); shardGateway.recover(indexShouldExists, recoveryStatus); } lastIndexVersion = recoveryStatus.index().version(); lastTranslogId = -1; lastTranslogLength = 0; lastTotalTranslogOperations = recoveryStatus.translog().currentTranslogOperations(); // start the shard if the gateway has not started it already. Note that if the // gateway // moved shard to POST_RECOVERY, it may have been started as well if: // 1) master sent a new cluster state indicating shard is initializing // 2) IndicesClusterStateService#applyInitializingShard will send a shard started // event // 3) Master will mark shard as started and this will be processed locally. IndexShardState shardState = indexShard.state(); if (shardState != IndexShardState.POST_RECOVERY && shardState != IndexShardState.STARTED) { indexShard.postRecovery("post recovery from gateway"); } // refresh the shard indexShard.refresh(new Engine.Refresh("post_gateway").force(true)); recoveryStatus.time(System.currentTimeMillis() - recoveryStatus.startTime()); recoveryStatus.updateStage(RecoveryStatus.Stage.DONE); if (logger.isDebugEnabled()) { logger.debug( "recovery completed from [{}], took [{}]", shardGateway, timeValueMillis(recoveryStatus.time())); } else if (logger.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); sb.append("recovery completed from ") .append(shardGateway) .append(", took [") .append(timeValueMillis(recoveryStatus.time())) .append("]\n"); sb.append(" index : files [") .append(recoveryStatus.index().numberOfFiles()) .append("] with total_size [") .append(new ByteSizeValue(recoveryStatus.index().totalSize())) .append("], took[") .append(TimeValue.timeValueMillis(recoveryStatus.index().time())) .append("]\n"); sb.append(" : recovered_files [") .append(recoveryStatus.index().numberOfRecoveredFiles()) .append("] with total_size [") .append(new ByteSizeValue(recoveryStatus.index().recoveredTotalSize())) .append("]\n"); sb.append(" : reusing_files [") .append(recoveryStatus.index().numberOfReusedFiles()) .append("] with total_size [") .append(new ByteSizeValue(recoveryStatus.index().reusedTotalSize())) .append("]\n"); sb.append(" start : took [") .append(TimeValue.timeValueMillis(recoveryStatus.start().time())) .append("], check_index [") .append(timeValueMillis(recoveryStatus.start().checkIndexTime())) .append("]\n"); sb.append(" translog : number_of_operations [") .append(recoveryStatus.translog().currentTranslogOperations()) .append("], took [") .append(TimeValue.timeValueMillis(recoveryStatus.translog().time())) .append("]"); logger.trace(sb.toString()); } listener.onRecoveryDone(); scheduleSnapshotIfNeeded(); } catch (IndexShardGatewayRecoveryException e) { if (indexShard.state() == IndexShardState.CLOSED) { // got closed on us, just ignore this recovery listener.onIgnoreRecovery("shard closed"); return; } if ((e.getCause() instanceof IndexShardClosedException) || (e.getCause() instanceof IndexShardNotStartedException)) { // got closed on us, just ignore this recovery listener.onIgnoreRecovery("shard closed"); return; } listener.onRecoveryFailed(e); } catch (IndexShardClosedException e) { listener.onIgnoreRecovery("shard closed"); } catch (IndexShardNotStartedException e) { listener.onIgnoreRecovery("shard closed"); } catch (Exception e) { if (indexShard.state() == IndexShardState.CLOSED) { // got closed on us, just ignore this recovery listener.onIgnoreRecovery("shard closed"); return; } listener.onRecoveryFailed( new IndexShardGatewayRecoveryException(shardId, "failed recovery", e)); } } }); }