public void start() { if (expectedSuccessfulOps == 0) { // no search shards to search on, bail with empty response // (it happens with search across _all with no indices around and consistent with broadcast // operations) listener.onResponse( new SearchResponse( InternalSearchResponse.empty(), null, 0, 0, buildTookInMillis(), ShardSearchFailure.EMPTY_ARRAY)); return; } int shardIndex = -1; for (final ShardIterator shardIt : shardsIts) { shardIndex++; final ShardRouting shard = shardIt.nextOrNull(); if (shard != null) { performFirstPhase(shardIndex, shardIt, shard); } else { // really, no shards active in this group onFirstPhaseResult( shardIndex, null, null, shardIt, new NoShardAvailableActionException(shardIt.shardId())); } } }
/** start sending current requests to replicas */ @Override protected void doRun() { if (pending.get() == 0) { doFinish(); return; } ShardRouting shard; shardIt.reset(); // reset the iterator while ((shard = shardIt.nextOrNull()) != null) { // if its unassigned, nothing to do here... if (shard.unassigned()) { continue; } // we index on a replica that is initializing as well since we might not have got the event // yet that it was started. We will get an exception IllegalShardState exception if its not // started // and that's fine, we will ignore it if (shard.primary()) { if (originalPrimaryShard.currentNodeId().equals(shard.currentNodeId()) == false) { // there is a new primary, we'll have to replicate to it. performOnReplica(shard, shard.currentNodeId()); } if (shard.relocating()) { performOnReplica(shard, shard.relocatingNodeId()); } } else if (shouldExecuteReplication(indexMetaData.getSettings())) { performOnReplica(shard, shard.currentNodeId()); if (shard.relocating()) { performOnReplica(shard, shard.relocatingNodeId()); } } } }
@Override protected ShardIterator shards(ClusterState clusterState, UpdateRequest request) throws ElasticSearchException { if (request.shardId() != -1) { return clusterState .routingTable() .index(request.index()) .shard(request.shardId()) .primaryShardIt(); } ShardIterator shardIterator = clusterService .operationRouting() .indexShards( clusterService.state(), request.index(), request.type(), request.id(), request.routing()); ShardRouting shard; while ((shard = shardIterator.nextOrNull()) != null) { if (shard.primary()) { return new PlainShardIterator(shardIterator.shardId(), ImmutableList.of(shard)); } } return new PlainShardIterator(shardIterator.shardId(), ImmutableList.<ShardRouting>of()); }
public void start() { if (shardsIts.size() == 0) { // no shards try { listener.onResponse(newResponse(request, new AtomicReferenceArray(0), clusterState)); } catch (Throwable e) { listener.onFailure(e); } return; } request.beforeStart(); // count the local operations, and perform the non local ones int shardIndex = -1; for (final ShardIterator shardIt : shardsIts) { shardIndex++; final ShardRouting shard = shardIt.nextOrNull(); if (shard != null) { performOperation(shardIt, shard, shardIndex); } else { // really, no shards active in this group onOperation( null, shardIt, shardIndex, new NoShardAvailableActionException(shardIt.shardId())); } } }
@Override protected void doRun() { if (checkBlocks() == false) { return; } final ShardIterator shardIt = shards(observer.observedState(), internalRequest); final ShardRouting primary = resolvePrimary(shardIt); if (primary == null) { retryBecauseUnavailable(shardIt.shardId(), "No active shards."); return; } if (primary.active() == false) { logger.trace( "primary shard [{}] is not yet active, scheduling a retry.", primary.shardId()); retryBecauseUnavailable( shardIt.shardId(), "Primary shard is not active or isn't assigned to a known node."); return; } if (observer.observedState().nodes().nodeExists(primary.currentNodeId()) == false) { logger.trace( "primary shard [{}] is assigned to anode we do not know the node, scheduling a retry.", primary.shardId(), primary.currentNodeId()); retryBecauseUnavailable( shardIt.shardId(), "Primary shard is not active or isn't assigned to a known node."); return; } routeRequestOrPerformLocally(primary, shardIt); }
private void mockShard(OperationRouting operationRouting, Integer shardId) { ShardIterator shardIterator = mock(ShardIterator.class); when(operationRouting.indexShards( any(ClusterState.class), anyString(), anyString(), Matchers.eq(shardId.toString()), anyString())) .thenReturn(shardIterator); when(shardIterator.shardId()).thenReturn(new ShardId("foo", shardId)); }
@Override protected ShardIterator shards(ClusterState clusterState, ShardDeleteRequest request) { GroupShardsIterator group = clusterService .operationRouting() .broadcastDeleteShards(clusterService.state(), request.index()); for (ShardIterator shardIt : group) { if (shardIt.shardId().id() == request.shardId()) { return shardIt; } } throw new ElasticsearchIllegalStateException( "No shards iterator found for shard [" + request.shardId() + "]"); }
void retry(@Nullable final Throwable failure) { if (observer.isTimedOut()) { // we running as a last attempt after a timeout has happened. don't retry Throwable listenFailure = failure; if (listenFailure == null) { if (shardIt == null) { listenFailure = new UnavailableShardsException( request.concreteIndex(), -1, "Timeout waiting for [{}], request: {}", request.timeout(), actionName); } else { listenFailure = new UnavailableShardsException( shardIt.shardId(), "[{}] shardIt, [{}] active : Timeout waiting for [{}], request: {}", shardIt.size(), shardIt.sizeActive(), request.timeout(), actionName); } } listener.onFailure(listenFailure); return; } observer.waitForNextChange( new ClusterStateObserver.Listener() { @Override public void onNewClusterState(ClusterState state) { doStart(); } @Override public void onClusterServiceClose() { listener.onFailure(new NodeClosedException(nodes.getLocalNode())); } @Override public void onTimeout(TimeValue timeout) { // just to be on the safe side, see if we can start it now? doStart(); } }, request.timeout()); }
void setFailure(ShardIterator shardIt, int shardIndex, Throwable t) { // we don't aggregate shard failures on non active shards (but do keep the header counts // right) if (TransportActions.isShardNotAvailableException(t)) { return; } if (!(t instanceof BroadcastShardOperationFailedException)) { t = new BroadcastShardOperationFailedException(shardIt.shardId(), t); } Object response = shardsResponses.get(shardIndex); if (response == null) { // just override it and return shardsResponses.set(shardIndex, t); } if (!(response instanceof Throwable)) { // we should never really get here... return; } // the failure is already present, try and not override it with an exception that is less // meaningless // for example, getting illegal shard state if (TransportActions.isReadOverrideException(t)) { shardsResponses.set(shardIndex, t); } }
private void doFinish() { if (finished.compareAndSet(false, true)) { Releasables.close(indexShardReference); final ShardId shardId = shardIt.shardId(); final ActionWriteResponse.ShardInfo.Failure[] failuresArray; if (!shardReplicaFailures.isEmpty()) { int slot = 0; failuresArray = new ActionWriteResponse.ShardInfo.Failure[shardReplicaFailures.size()]; for (Map.Entry<String, Throwable> entry : shardReplicaFailures.entrySet()) { RestStatus restStatus = ExceptionsHelper.status(entry.getValue()); failuresArray[slot++] = new ActionWriteResponse.ShardInfo.Failure( shardId.getIndex(), shardId.getId(), entry.getKey(), entry.getValue(), restStatus, false); } } else { failuresArray = ActionWriteResponse.EMPTY; } finalResponse.setShardInfo( new ActionWriteResponse.ShardInfo(totalShards, success.get(), failuresArray)); listener.onResponse(finalResponse); } }
@Override public void onFailure(Throwable t) { logger.error( "unexpected error while replicating for action [{}]. shard [{}]. ", t, actionName, shardIt.shardId()); forceFinishAsFailed(t); }
@SuppressWarnings({"unchecked"}) void onOperation( @Nullable ShardRouting shard, final ShardIterator shardIt, int shardIndex, Throwable t) { // we set the shard failure always, even if its the first in the replication group, and the // next one // will work (it will just override it...) setFailure(shardIt, shardIndex, t); ShardRouting nextShard = shardIt.nextOrNull(); if (nextShard != null) { if (t != null) { if (logger.isTraceEnabled()) { if (!TransportActions.isShardNotAvailableException(t)) { if (shard != null) { logger.trace(shard.shortSummary() + ": Failed to execute [" + request + "]", t); } else { logger.trace(shardIt.shardId() + ": Failed to execute [" + request + "]", t); } } } } // we are not threaded here if we got here from the transport // or we possibly threaded if we got from a local threaded one, // in which case, the next shard in the partition will not be local one // so there is no meaning to this flag performOperation(shardIt, nextShard, shardIndex, true); } else { if (logger.isDebugEnabled()) { if (t != null) { if (!TransportActions.isShardNotAvailableException(t)) { if (shard != null) { logger.debug(shard.shortSummary() + ": Failed to execute [" + request + "]", t); } else { logger.debug(shardIt.shardId() + ": Failed to execute [" + request + "]", t); } } } } if (expectedOps == counterOps.incrementAndGet()) { finishHim(); } } }
private void onFirstPhaseResult( int shardIndex, String nodeId, FirstResult result, ShardIterator shardIt) { result.shardTarget(new SearchShardTarget(nodeId, shardIt.shardId())); processFirstPhaseResult(shardIndex, result); // we need to increment successful ops first before we compare the exit condition otherwise if // we // are fast we could concurrently update totalOps but then preempt one of the threads which can // cause the successor to read a wrong value from successfulOps if second phase is very fast ie. // count etc. successfulOps.incrementAndGet(); // increment all the "future" shards to update the total ops since we some may work and some may // not... // and when that happens, we break on total ops, so we must maintain them final int xTotalOps = totalOps.addAndGet(shardIt.remaining() + 1); if (xTotalOps == expectedTotalOps) { try { innerMoveToSecondPhase(); } catch (Exception e) { if (logger.isDebugEnabled()) { logger.debug( (Supplier<?>) () -> new ParameterizedMessage( "{}: Failed to execute [{}] while moving to second phase", shardIt.shardId(), request), e); } raiseEarlyFailure( new ReduceSearchPhaseException(firstPhaseName(), "", e, buildShardFailures())); } } else if (xTotalOps > expectedTotalOps) { raiseEarlyFailure( new IllegalStateException( "unexpected higher total ops [" + xTotalOps + "] compared " + "to expected [" + expectedTotalOps + "]")); } }
@Override public Routing getRouting(WhereClause whereClause, @Nullable String preference) { Map<String, Map<String, List<Integer>>> locations = new TreeMap<>(); GroupShardsIterator shardIterators = clusterService .operationRouting() .searchShards( clusterService.state(), Strings.EMPTY_ARRAY, new String[] {index}, null, preference); ShardRouting shardRouting; for (ShardIterator shardIterator : shardIterators) { shardRouting = shardIterator.nextOrNull(); processShardRouting(locations, shardRouting, shardIterator.shardId()); } return new Routing(locations); }
protected ShardRouting resolvePrimary(ShardIterator shardIt) { // no shardIt, might be in the case between index gateway recovery and shardIt initialization ShardRouting shard; while ((shard = shardIt.nextOrNull()) != null) { // we only deal with primary shardIt here... if (shard.primary()) { return shard; } } return null; }
private ShardRouting corruptRandomPrimaryFile(final boolean includePerCommitFiles) throws IOException { ClusterState state = client().admin().cluster().prepareState().get().getState(); Index test = state.metaData().index("test").getIndex(); GroupShardsIterator shardIterators = state.getRoutingTable().activePrimaryShardsGrouped(new String[] {"test"}, false); List<ShardIterator> iterators = iterableAsArrayList(shardIterators); ShardIterator shardIterator = RandomPicks.randomFrom(random(), iterators); ShardRouting shardRouting = shardIterator.nextOrNull(); assertNotNull(shardRouting); assertTrue(shardRouting.primary()); assertTrue(shardRouting.assignedToNode()); String nodeId = shardRouting.currentNodeId(); NodesStatsResponse nodeStatses = client().admin().cluster().prepareNodesStats(nodeId).setFs(true).get(); Set<Path> files = new TreeSet<>(); // treeset makes sure iteration order is deterministic for (FsInfo.Path info : nodeStatses.getNodes().get(0).getFs()) { String path = info.getPath(); Path file = PathUtils.get(path) .resolve("indices") .resolve(test.getUUID()) .resolve(Integer.toString(shardRouting.getId())) .resolve("index"); if (Files.exists(file)) { // multi data path might only have one path in use try (DirectoryStream<Path> stream = Files.newDirectoryStream(file)) { for (Path item : stream) { if (Files.isRegularFile(item) && "write.lock".equals(item.getFileName().toString()) == false) { if (includePerCommitFiles || isPerSegmentFile(item.getFileName().toString())) { files.add(item); } } } } } } pruneOldDeleteGenerations(files); CorruptionUtils.corruptFile(random(), files.toArray(new Path[0])); return shardRouting; }
void performFirstPhase( final int shardIndex, final ShardIterator shardIt, final ShardRouting shard) { if (shard == null) { // no more active shards... (we should not really get here, but just for safety) onFirstPhaseResult( shardIndex, null, null, shardIt, new NoShardAvailableActionException(shardIt.shardId())); } else { final DiscoveryNode node = nodeIdToDiscoveryNode.apply(shard.currentNodeId()); if (node == null) { onFirstPhaseResult( shardIndex, shard, null, shardIt, new NoShardAvailableActionException(shardIt.shardId())); } else { AliasFilter filter = this.aliasFilter.get(shard.index().getUUID()); assert filter != null; ShardSearchTransportRequest transportRequest = new ShardSearchTransportRequest( request, shardIt.shardId(), shardsIts.size(), filter, startTime()); sendExecuteFirstPhase( node, transportRequest, new ActionListener<FirstResult>() { @Override public void onResponse(FirstResult result) { onFirstPhaseResult(shardIndex, shard.currentNodeId(), result, shardIt); } @Override public void onFailure(Exception t) { onFirstPhaseResult(shardIndex, shard, node.getId(), shardIt, t); } }); } } }
@SuppressWarnings({"unchecked"}) void onOperation( @Nullable ShardRouting shard, final ShardIterator shardIt, int shardIndex, Throwable t) { // we set the shard failure always, even if its the first in the replication group, and the // next one // will work (it will just override it...) setFailure(shardIt, shardIndex, t); ShardRouting nextShard = shardIt.nextOrNull(); if (nextShard != null) { if (t != null) { if (logger.isTraceEnabled()) { if (!TransportActions.isShardNotAvailableException(t)) { logger.trace( "{}: failed to execute [{}]", t, shard != null ? shard.shortSummary() : shardIt.shardId(), request); } } } performOperation(shardIt, nextShard, shardIndex); } else { if (logger.isDebugEnabled()) { if (t != null) { if (!TransportActions.isShardNotAvailableException(t)) { logger.debug( "{}: failed to execute [{}]", t, shard != null ? shard.shortSummary() : shardIt.shardId(), request); } } } if (expectedOps == counterOps.incrementAndGet()) { finishHim(); } } }
public void start() { if (shardsIts.size() == 0) { // no shards try { listener.onResponse(newResponse(request, new AtomicReferenceArray(0), clusterState)); } catch (Throwable e) { listener.onFailure(e); } } request.beforeStart(); // count the local operations, and perform the non local ones int localOperations = 0; int shardIndex = -1; for (final ShardIterator shardIt : shardsIts) { shardIndex++; final ShardRouting shard = shardIt.firstOrNull(); if (shard != null) { if (shard.currentNodeId().equals(nodes.localNodeId())) { localOperations++; } else { // do the remote operation here, the localAsync flag is not relevant performOperation(shardIt, shardIndex, true); } } else { // really, no shards active in this group onOperation( null, shardIt, shardIndex, new NoShardAvailableActionException(shardIt.shardId())); } } // we have local operations, perform them now if (localOperations > 0) { if (request.operationThreading() == BroadcastOperationThreading.SINGLE_THREAD) { request.beforeLocalFork(); threadPool .executor(executor) .execute( new Runnable() { @Override public void run() { int shardIndex = -1; for (final ShardIterator shardIt : shardsIts) { shardIndex++; final ShardRouting shard = shardIt.firstOrNull(); if (shard != null) { if (shard.currentNodeId().equals(nodes.localNodeId())) { performOperation(shardIt, shardIndex, false); } } } } }); } else { boolean localAsync = request.operationThreading() == BroadcastOperationThreading.THREAD_PER_SHARD; if (localAsync) { request.beforeLocalFork(); } shardIndex = -1; for (final ShardIterator shardIt : shardsIts) { shardIndex++; final ShardRouting shard = shardIt.firstOrNull(); if (shard != null) { if (shard.currentNodeId().equals(nodes.localNodeId())) { performOperation(shardIt, shardIndex, localAsync); } } } } } }
void performOnReplica( final PrimaryResponse<Response, ReplicaRequest> response, final AtomicInteger counter, final ShardRouting shard, String nodeId) { // if we don't have that node, it means that it might have failed and will be created again, // in // this case, we don't have to do the operation, and just let it failover if (!nodes.nodeExists(nodeId)) { if (counter.decrementAndGet() == 0) { listener.onResponse(response.response()); } return; } final ReplicaOperationRequest shardRequest = new ReplicaOperationRequest(shardIt.shardId().id(), response.replicaRequest()); if (!nodeId.equals(nodes.localNodeId())) { DiscoveryNode node = nodes.get(nodeId); transportService.sendRequest( node, transportReplicaAction, shardRequest, transportOptions, new VoidTransportResponseHandler(ThreadPool.Names.SAME) { @Override public void handleResponse(VoidStreamable vResponse) { finishIfPossible(); } @Override public void handleException(TransportException exp) { if (!ignoreReplicaException(exp.unwrapCause())) { logger.warn( "Failed to perform " + transportAction + " on replica " + shardIt.shardId(), exp); shardStateAction.shardFailed( shard, "Failed to perform [" + transportAction + "] on replica, message [" + detailedMessage(exp) + "]"); } finishIfPossible(); } private void finishIfPossible() { if (counter.decrementAndGet() == 0) { listener.onResponse(response.response()); } } }); } else { if (request.operationThreaded()) { request.beforeLocalFork(); threadPool .executor(executor) .execute( new Runnable() { @Override public void run() { try { shardOperationOnReplica(shardRequest); } catch (Exception e) { if (!ignoreReplicaException(e)) { logger.warn( "Failed to perform " + transportAction + " on replica " + shardIt.shardId(), e); shardStateAction.shardFailed( shard, "Failed to perform [" + transportAction + "] on replica, message [" + detailedMessage(e) + "]"); } } if (counter.decrementAndGet() == 0) { listener.onResponse(response.response()); } } }); } else { try { shardOperationOnReplica(shardRequest); } catch (Exception e) { if (!ignoreReplicaException(e)) { logger.warn( "Failed to perform " + transportAction + " on replica" + shardIt.shardId(), e); shardStateAction.shardFailed( shard, "Failed to perform [" + transportAction + "] on replica, message [" + detailedMessage(e) + "]"); } } if (counter.decrementAndGet() == 0) { listener.onResponse(response.response()); } } } }
protected boolean doStart() { nodes = observer.observedState().nodes(); try { ClusterBlockException blockException = checkGlobalBlock(observer.observedState()); if (blockException != null) { if (blockException.retryable()) { retry(blockException); return false; } else { throw blockException; } } internalRequest.concreteIndex( observer .observedState() .metaData() .concreteSingleIndex( internalRequest.request().index(), internalRequest.request().indicesOptions())); // check if we need to execute, and if not, return if (!resolveRequest(observer.observedState(), internalRequest, listener)) { return true; } blockException = checkRequestBlock(observer.observedState(), internalRequest); if (blockException != null) { if (blockException.retryable()) { retry(blockException); return false; } else { throw blockException; } } shardIt = shards(observer.observedState(), internalRequest); } catch (Throwable e) { listener.onFailure(e); return true; } // no shardIt, might be in the case between index gateway recovery and shardIt initialization if (shardIt.size() == 0) { retry(null); return false; } // this transport only make sense with an iterator that returns a single shard routing (like // primary) assert shardIt.size() == 1; ShardRouting shard = shardIt.nextOrNull(); assert shard != null; if (!shard.active()) { retry(null); return false; } if (!operationStarted.compareAndSet(false, true)) { return true; } internalRequest.request().shardId = shardIt.shardId().id(); if (shard.currentNodeId().equals(nodes.localNodeId())) { internalRequest.request().beforeLocalFork(); try { threadPool .executor(executor) .execute( new Runnable() { @Override public void run() { try { shardOperation(internalRequest, listener); } catch (Throwable e) { if (retryOnFailure(e)) { operationStarted.set(false); // we already marked it as started when we executed it (removed the // listener) so pass false // to re-add to the cluster listener retry(null); } else { listener.onFailure(e); } } } }); } catch (Throwable e) { if (retryOnFailure(e)) { retry(null); } else { listener.onFailure(e); } } } else { DiscoveryNode node = nodes.get(shard.currentNodeId()); transportService.sendRequest( node, actionName, internalRequest.request(), transportOptions(), new BaseTransportResponseHandler<Response>() { @Override public Response newInstance() { return newResponse(); } @Override public String executor() { return ThreadPool.Names.SAME; } @Override public void handleResponse(Response response) { listener.onResponse(response); } @Override public void handleException(TransportException exp) { // if we got disconnected from the node, or the node / shard is not in the right // state (being closed) if (exp.unwrapCause() instanceof ConnectTransportException || exp.unwrapCause() instanceof NodeClosedException || retryOnFailure(exp)) { operationStarted.set(false); // we already marked it as started when we executed it (removed the listener) so // pass false // to re-add to the cluster listener retry(null); } else { listener.onFailure(exp); } } }); } return true; }
/** * Tests corruption that happens on a single shard when no replicas are present. We make sure that * the primary stays unassigned and all other replicas for the healthy shards happens */ public void testCorruptPrimaryNoReplica() throws ExecutionException, InterruptedException, IOException { int numDocs = scaledRandomIntBetween(100, 1000); internalCluster().ensureAtLeastNumDataNodes(2); assertAcked( prepareCreate("test") .setSettings( Settings.builder() .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "0") .put(MergePolicyConfig.INDEX_MERGE_ENABLED, false) .put( MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), false) // no checkindex - we corrupt shards on purpose .put( IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue( 1, ByteSizeUnit .PB)) // no translog based flush - it might change the .liv / // segments.N files )); ensureGreen(); IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs]; for (int i = 0; i < builders.length; i++) { builders[i] = client().prepareIndex("test", "type").setSource("field", "value"); } indexRandom(true, builders); ensureGreen(); assertAllSuccessful( client() .admin() .indices() .prepareFlush() .setForce(true) .setWaitIfOngoing(true) .execute() .actionGet()); // we have to flush at least once here since we don't corrupt the translog SearchResponse countResponse = client().prepareSearch().setSize(0).get(); assertHitCount(countResponse, numDocs); ShardRouting shardRouting = corruptRandomPrimaryFile(); /* * we corrupted the primary shard - now lets make sure we never recover from it successfully */ Settings build = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "1").build(); client().admin().indices().prepareUpdateSettings("test").setSettings(build).get(); client().admin().cluster().prepareReroute().get(); boolean didClusterTurnRed = awaitBusy( () -> { ClusterHealthStatus test = client() .admin() .cluster() .health(Requests.clusterHealthRequest("test")) .actionGet() .getStatus(); return test == ClusterHealthStatus.RED; }, 5, TimeUnit .MINUTES); // sometimes on slow nodes the replication / recovery is just dead slow final ClusterHealthResponse response = client().admin().cluster().health(Requests.clusterHealthRequest("test")).get(); if (response.getStatus() != ClusterHealthStatus.RED) { logger.info("Cluster turned red in busy loop: {}", didClusterTurnRed); logger.info( "cluster state:\n{}\n{}", client().admin().cluster().prepareState().get().getState().prettyPrint(), client().admin().cluster().preparePendingClusterTasks().get().prettyPrint()); } assertThat(response.getStatus(), is(ClusterHealthStatus.RED)); ClusterState state = client().admin().cluster().prepareState().get().getState(); GroupShardsIterator shardIterators = state.getRoutingTable().activePrimaryShardsGrouped(new String[] {"test"}, false); for (ShardIterator iterator : shardIterators) { ShardRouting routing; while ((routing = iterator.nextOrNull()) != null) { if (routing.getId() == shardRouting.getId()) { assertThat(routing.state(), equalTo(ShardRoutingState.UNASSIGNED)); } else { assertThat( routing.state(), anyOf(equalTo(ShardRoutingState.RELOCATING), equalTo(ShardRoutingState.STARTED))); } } } final List<Path> files = listShardFiles(shardRouting); Path corruptedFile = null; for (Path file : files) { if (file.getFileName().toString().startsWith("corrupted_")) { corruptedFile = file; break; } } assertThat(corruptedFile, notNullValue()); }
@Override protected void doExecute(final Request request, final ActionListener<Response> listener) { ClusterState clusterState = clusterService.state(); ClusterBlockException blockException = checkGlobalBlock(clusterState, request); if (blockException != null) { throw blockException; } // update to concrete index request.index( clusterState.metaData().concreteSingleIndex(request.index(), request.indicesOptions())); blockException = checkRequestBlock(clusterState, request); if (blockException != null) { throw blockException; } GroupShardsIterator groups; try { groups = shards(request); } catch (Throwable e) { listener.onFailure(e); return; } final AtomicInteger indexCounter = new AtomicInteger(); final AtomicInteger failureCounter = new AtomicInteger(); final AtomicInteger completionCounter = new AtomicInteger(groups.size()); final AtomicReferenceArray<ShardActionResult> shardsResponses = new AtomicReferenceArray<>(groups.size()); for (final ShardIterator shardIt : groups) { ShardRequest shardRequest = newShardRequestInstance(request, shardIt.shardId().id()); // TODO for now, we fork operations on shardIt of the index shardRequest.beforeLocalFork(); // optimize for local fork shardRequest.operationThreaded(true); // no need for threaded listener, we will fork when its done based on the index request shardRequest.listenerThreaded(false); shardAction.execute( shardRequest, new ActionListener<ShardResponse>() { @Override public void onResponse(ShardResponse result) { shardsResponses.set(indexCounter.getAndIncrement(), new ShardActionResult(result)); returnIfNeeded(); } @Override public void onFailure(Throwable e) { failureCounter.getAndIncrement(); int index = indexCounter.getAndIncrement(); if (accumulateExceptions()) { shardsResponses.set( index, new ShardActionResult( new DefaultShardOperationFailedException( request.index, shardIt.shardId().id(), e))); } returnIfNeeded(); } private void returnIfNeeded() { if (completionCounter.decrementAndGet() == 0) { List<ShardResponse> responses = Lists.newArrayList(); List<ShardOperationFailedException> failures = Lists.newArrayList(); for (int i = 0; i < shardsResponses.length(); i++) { ShardActionResult shardActionResult = shardsResponses.get(i); if (shardActionResult == null) { assert !accumulateExceptions(); continue; } if (shardActionResult.isFailure()) { assert accumulateExceptions() && shardActionResult.shardFailure != null; failures.add(shardActionResult.shardFailure); } else { responses.add(shardActionResult.shardResponse); } } assert failures.size() == 0 || failures.size() == failureCounter.get(); listener.onResponse( newResponseInstance(request, responses, failureCounter.get(), failures)); } } }); } }
void performReplicas(final PrimaryResponse<Response, ReplicaRequest> response) { if (ignoreReplicas() || shardIt.size() == 1 /* no replicas */) { postPrimaryOperation(request, response); listener.onResponse(response.response()); return; } // initialize the counter int replicaCounter = shardIt.assignedReplicasIncludingRelocating(); if (replicaCounter == 0) { postPrimaryOperation(request, response); listener.onResponse(response.response()); return; } if (replicationType == ReplicationType.ASYNC) { postPrimaryOperation(request, response); // async replication, notify the listener listener.onResponse(response.response()); // now, trick the counter so it won't decrease to 0 and notify the listeners replicaCounter = Integer.MIN_VALUE; } // we add one to the replica count to do the postPrimaryOperation replicaCounter++; AtomicInteger counter = new AtomicInteger(replicaCounter); shardIt.reset(); // reset the iterator ShardRouting shard; while ((shard = shardIt.nextOrNull()) != null) { // if its unassigned, nothing to do here... if (shard.unassigned()) { continue; } // if the shard is primary and relocating, add one to the counter since we perform it on the // replica as well // (and we already did it on the primary) boolean doOnlyOnRelocating = false; if (shard.primary()) { if (shard.relocating()) { doOnlyOnRelocating = true; } else { continue; } } // we index on a replica that is initializing as well since we might not have got the event // yet that it was started. We will get an exception IllegalShardState exception if its not // started // and that's fine, we will ignore it if (!doOnlyOnRelocating) { performOnReplica(response, counter, shard, shard.currentNodeId()); } if (shard.relocating()) { performOnReplica(response, counter, shard, shard.relocatingNodeId()); } } // now do the postPrimary operation, and check if the listener needs to be invoked postPrimaryOperation(request, response); // we also invoke here in case replicas finish before postPrimaryAction does if (counter.decrementAndGet() == 0) { listener.onResponse(response.response()); } }
ASyncAction( MultiPercolateRequest multiPercolateRequest, List<Object> percolateRequests, ActionListener<MultiPercolateResponse> finalListener, ClusterState clusterState) { this.finalListener = finalListener; this.multiPercolateRequest = multiPercolateRequest; this.percolateRequests = percolateRequests; responsesByItemAndShard = new AtomicReferenceArray<>(percolateRequests.size()); expectedOperationsPerItem = new AtomicReferenceArray<>(percolateRequests.size()); reducedResponses = new AtomicArray<>(percolateRequests.size()); // Resolving concrete indices and routing and grouping the requests by shard requestsByShard = new HashMap<>(); // Keep track what slots belong to what shard, in case a request to a shard fails on all // copies shardToSlots = new HashMap<>(); int expectedResults = 0; for (int slot = 0; slot < percolateRequests.size(); slot++) { Object element = percolateRequests.get(slot); assert element != null; if (element instanceof PercolateRequest) { PercolateRequest percolateRequest = (PercolateRequest) element; String[] concreteIndices; try { concreteIndices = indexNameExpressionResolver.concreteIndices(clusterState, percolateRequest); } catch (IndexNotFoundException e) { reducedResponses.set(slot, e); responsesByItemAndShard.set(slot, new AtomicReferenceArray(0)); expectedOperationsPerItem.set(slot, new AtomicInteger(0)); continue; } Map<String, Set<String>> routing = indexNameExpressionResolver.resolveSearchRouting( clusterState, percolateRequest.routing(), percolateRequest.indices()); // TODO: I only need shardIds, ShardIterator(ShardRouting) is only needed in // TransportShardMultiPercolateAction GroupShardsIterator shards = clusterService .operationRouting() .searchShards( clusterState, concreteIndices, routing, percolateRequest.preference()); if (shards.size() == 0) { reducedResponses.set(slot, new UnavailableShardsException(null, "No shards available")); responsesByItemAndShard.set(slot, new AtomicReferenceArray(0)); expectedOperationsPerItem.set(slot, new AtomicInteger(0)); continue; } // The shard id is used as index in the atomic ref array, so we need to find out how many // shards there are regardless of routing: int numShards = clusterService .operationRouting() .searchShardsCount(clusterState, concreteIndices, null); responsesByItemAndShard.set(slot, new AtomicReferenceArray(numShards)); expectedOperationsPerItem.set(slot, new AtomicInteger(shards.size())); for (ShardIterator shard : shards) { ShardId shardId = shard.shardId(); TransportShardMultiPercolateAction.Request requests = requestsByShard.get(shardId); if (requests == null) { requestsByShard.put( shardId, requests = new TransportShardMultiPercolateAction.Request( multiPercolateRequest, shardId.getIndex(), shardId.getId(), percolateRequest.preference())); } logger.trace("Adding shard[{}] percolate request for item[{}]", shardId, slot); requests.add( new TransportShardMultiPercolateAction.Request.Item( slot, new PercolateShardRequest(shardId, percolateRequest))); IntArrayList items = shardToSlots.get(shardId); if (items == null) { shardToSlots.put(shardId, items = new IntArrayList()); } items.add(slot); } expectedResults++; } else if (element instanceof Throwable || element instanceof MultiGetResponse.Failure) { logger.trace("item[{}] won't be executed, reason: {}", slot, element); reducedResponses.set(slot, element); responsesByItemAndShard.set(slot, new AtomicReferenceArray(0)); expectedOperationsPerItem.set(slot, new AtomicInteger(0)); } } expectedOperations = new AtomicInteger(expectedResults); }
void performOperation(final ShardIterator shardIt, int shardIndex, boolean localAsync) { performOperation(shardIt, shardIt.nextOrNull(), shardIndex, localAsync); }
void performOperation( final ShardIterator shardIt, final ShardRouting shard, final int shardIndex, boolean localAsync) { if (shard == null) { // no more active shards... (we should not really get here, just safety) onOperation( null, shardIt, shardIndex, new NoShardAvailableActionException(shardIt.shardId())); } else { try { final ShardRequest shardRequest = newShardRequest(shard, request); if (shard.currentNodeId().equals(nodes.localNodeId())) { if (localAsync) { threadPool .executor(executor) .execute( new Runnable() { @Override public void run() { try { onOperation(shard, shardIndex, shardOperation(shardRequest)); } catch (Throwable e) { onOperation(shard, shardIt, shardIndex, e); } } }); } else { onOperation(shard, shardIndex, shardOperation(shardRequest)); } } else { DiscoveryNode node = nodes.get(shard.currentNodeId()); if (node == null) { // no node connected, act as failure onOperation( shard, shardIt, shardIndex, new NoShardAvailableActionException(shardIt.shardId())); } else { transportService.sendRequest( node, transportShardAction, shardRequest, new BaseTransportResponseHandler<ShardResponse>() { @Override public ShardResponse newInstance() { return newShardResponse(); } @Override public String executor() { return ThreadPool.Names.SAME; } @Override public void handleResponse(ShardResponse response) { onOperation(shard, shardIndex, response); } @Override public void handleException(TransportException e) { onOperation(shard, shardIt, shardIndex, e); } }); } } } catch (Throwable e) { onOperation(shard, shardIt, shardIndex, e); } } }
protected void doStart() { nodes = observer.observedState().nodes(); try { ClusterBlockException blockException = checkGlobalBlock(observer.observedState()); if (blockException != null) { if (blockException.retryable()) { retry(blockException); return; } else { throw blockException; } } request.concreteIndex( indexNameExpressionResolver .concreteSingleIndex(observer.observedState(), request) .getName()); resolveRequest(observer.observedState(), request); blockException = checkRequestBlock(observer.observedState(), request); if (blockException != null) { if (blockException.retryable()) { retry(blockException); return; } else { throw blockException; } } shardIt = shards(observer.observedState(), request); } catch (Throwable e) { listener.onFailure(e); return; } // no shardIt, might be in the case between index gateway recovery and shardIt initialization if (shardIt.size() == 0) { retry(null); return; } // this transport only make sense with an iterator that returns a single shard routing (like // primary) assert shardIt.size() == 1; ShardRouting shard = shardIt.nextOrNull(); assert shard != null; if (!shard.active()) { retry(null); return; } request.shardId = shardIt.shardId(); DiscoveryNode node = nodes.get(shard.currentNodeId()); transportService.sendRequest( node, shardActionName, request, transportOptions(), new BaseTransportResponseHandler<Response>() { @Override public Response newInstance() { return newResponse(); } @Override public String executor() { return ThreadPool.Names.SAME; } @Override public void handleResponse(Response response) { listener.onResponse(response); } @Override public void handleException(TransportException exp) { Throwable cause = exp.unwrapCause(); // if we got disconnected from the node, or the node / shard is not in the right state // (being closed) if (cause instanceof ConnectTransportException || cause instanceof NodeClosedException || retryOnFailure(exp)) { retry(cause); } else { listener.onFailure(exp); } } }); }
private void onFirstPhaseResult( final int shardIndex, @Nullable ShardRouting shard, @Nullable String nodeId, final ShardIterator shardIt, Exception e) { // we always add the shard failure for a specific shard instance // we do make sure to clean it on a successful response from a shard SearchShardTarget shardTarget = new SearchShardTarget(nodeId, shardIt.shardId()); addShardFailure(shardIndex, shardTarget, e); if (totalOps.incrementAndGet() == expectedTotalOps) { if (logger.isDebugEnabled()) { if (e != null && !TransportActions.isShardNotAvailableException(e)) { logger.debug( (Supplier<?>) () -> new ParameterizedMessage( "{}: Failed to execute [{}]", shard != null ? shard.shortSummary() : shardIt.shardId(), request), e); } else if (logger.isTraceEnabled()) { logger.trace( (Supplier<?>) () -> new ParameterizedMessage("{}: Failed to execute [{}]", shard, request), e); } } final ShardSearchFailure[] shardSearchFailures = buildShardFailures(); if (successfulOps.get() == 0) { if (logger.isDebugEnabled()) { logger.debug( (Supplier<?>) () -> new ParameterizedMessage( "All shards failed for phase: [{}]", firstPhaseName()), e); } // no successful ops, raise an exception raiseEarlyFailure( new SearchPhaseExecutionException( firstPhaseName(), "all shards failed", e, shardSearchFailures)); } else { try { innerMoveToSecondPhase(); } catch (Exception inner) { inner.addSuppressed(e); raiseEarlyFailure( new ReduceSearchPhaseException(firstPhaseName(), "", inner, shardSearchFailures)); } } } else { final ShardRouting nextShard = shardIt.nextOrNull(); final boolean lastShard = nextShard == null; // trace log this exception logger.trace( (Supplier<?>) () -> new ParameterizedMessage( "{}: Failed to execute [{}] lastShard [{}]", shard != null ? shard.shortSummary() : shardIt.shardId(), request, lastShard), e); if (!lastShard) { try { performFirstPhase(shardIndex, shardIt, nextShard); } catch (Exception inner) { inner.addSuppressed(e); onFirstPhaseResult(shardIndex, shard, shard.currentNodeId(), shardIt, inner); } } else { // no more shards active, add a failure if (logger.isDebugEnabled() && !logger.isTraceEnabled()) { // do not double log this exception if (e != null && !TransportActions.isShardNotAvailableException(e)) { logger.debug( (Supplier<?>) () -> new ParameterizedMessage( "{}: Failed to execute [{}] lastShard [{}]", shard != null ? shard.shortSummary() : shardIt.shardId(), request, lastShard), e); } } } } }
private void executeBulk( final BulkRequest bulkRequest, final long startTime, final ActionListener<BulkResponse> listener, final AtomicArray<BulkItemResponse> responses) { final ClusterState clusterState = clusterService.state(); // TODO use timeout to wait here if its blocked... clusterState.blocks().globalBlockedRaiseException(ClusterBlockLevel.WRITE); final ConcreteIndices concreteIndices = new ConcreteIndices(clusterState, indexNameExpressionResolver); MetaData metaData = clusterState.metaData(); for (int i = 0; i < bulkRequest.requests.size(); i++) { ActionRequest request = bulkRequest.requests.get(i); if (request instanceof DocumentRequest) { DocumentRequest req = (DocumentRequest) request; if (addFailureIfIndexIsUnavailable( req, bulkRequest, responses, i, concreteIndices, metaData)) { continue; } String concreteIndex = concreteIndices.resolveIfAbsent(req); if (request instanceof IndexRequest) { IndexRequest indexRequest = (IndexRequest) request; MappingMetaData mappingMd = null; if (metaData.hasIndex(concreteIndex)) { mappingMd = metaData.index(concreteIndex).mappingOrDefault(indexRequest.type()); } try { indexRequest.process(metaData, mappingMd, allowIdGeneration, concreteIndex); } catch (ElasticsearchParseException | RoutingMissingException e) { BulkItemResponse.Failure failure = new BulkItemResponse.Failure( concreteIndex, indexRequest.type(), indexRequest.id(), e); BulkItemResponse bulkItemResponse = new BulkItemResponse(i, "index", failure); responses.set(i, bulkItemResponse); // make sure the request gets never processed again bulkRequest.requests.set(i, null); } } else { concreteIndices.resolveIfAbsent(req); req.routing( clusterState .metaData() .resolveIndexRouting(req.parent(), req.routing(), req.index())); } } } // first, go over all the requests and create a ShardId -> Operations mapping Map<ShardId, List<BulkItemRequest>> requestsByShard = new HashMap<>(); for (int i = 0; i < bulkRequest.requests.size(); i++) { ActionRequest request = bulkRequest.requests.get(i); if (request instanceof IndexRequest) { IndexRequest indexRequest = (IndexRequest) request; String concreteIndex = concreteIndices.getConcreteIndex(indexRequest.index()); ShardId shardId = clusterService .operationRouting() .indexShards( clusterState, concreteIndex, indexRequest.type(), indexRequest.id(), indexRequest.routing()) .shardId(); List<BulkItemRequest> list = requestsByShard.get(shardId); if (list == null) { list = new ArrayList<>(); requestsByShard.put(shardId, list); } list.add(new BulkItemRequest(i, request)); } else if (request instanceof DeleteRequest) { DeleteRequest deleteRequest = (DeleteRequest) request; String concreteIndex = concreteIndices.getConcreteIndex(deleteRequest.index()); MappingMetaData mappingMd = clusterState.metaData().index(concreteIndex).mappingOrDefault(deleteRequest.type()); if (mappingMd != null && mappingMd.routing().required() && deleteRequest.routing() == null) { // if routing is required, and no routing on the delete request, we need to broadcast // it.... GroupShardsIterator groupShards = clusterService.operationRouting().broadcastDeleteShards(clusterState, concreteIndex); for (ShardIterator shardIt : groupShards) { List<BulkItemRequest> list = requestsByShard.get(shardIt.shardId()); if (list == null) { list = new ArrayList<>(); requestsByShard.put(shardIt.shardId(), list); } list.add(new BulkItemRequest(i, deleteRequest)); } } else { ShardId shardId = clusterService .operationRouting() .indexShards( clusterState, concreteIndex, deleteRequest.type(), deleteRequest.id(), deleteRequest.routing()) .shardId(); List<BulkItemRequest> list = requestsByShard.get(shardId); if (list == null) { list = new ArrayList<>(); requestsByShard.put(shardId, list); } list.add(new BulkItemRequest(i, request)); } } else if (request instanceof UpdateRequest) { UpdateRequest updateRequest = (UpdateRequest) request; String concreteIndex = concreteIndices.getConcreteIndex(updateRequest.index()); MappingMetaData mappingMd = clusterState.metaData().index(concreteIndex).mappingOrDefault(updateRequest.type()); if (mappingMd != null && mappingMd.routing().required() && updateRequest.routing() == null) { BulkItemResponse.Failure failure = new BulkItemResponse.Failure( updateRequest.index(), updateRequest.type(), updateRequest.id(), new IllegalArgumentException("routing is required for this item")); responses.set(i, new BulkItemResponse(i, updateRequest.type(), failure)); continue; } ShardId shardId = clusterService .operationRouting() .indexShards( clusterState, concreteIndex, updateRequest.type(), updateRequest.id(), updateRequest.routing()) .shardId(); List<BulkItemRequest> list = requestsByShard.get(shardId); if (list == null) { list = new ArrayList<>(); requestsByShard.put(shardId, list); } list.add(new BulkItemRequest(i, request)); } } if (requestsByShard.isEmpty()) { listener.onResponse( new BulkResponse( responses.toArray(new BulkItemResponse[responses.length()]), buildTookInMillis(startTime))); return; } final AtomicInteger counter = new AtomicInteger(requestsByShard.size()); for (Map.Entry<ShardId, List<BulkItemRequest>> entry : requestsByShard.entrySet()) { final ShardId shardId = entry.getKey(); final List<BulkItemRequest> requests = entry.getValue(); BulkShardRequest bulkShardRequest = new BulkShardRequest( bulkRequest, shardId, bulkRequest.refresh(), requests.toArray(new BulkItemRequest[requests.size()])); bulkShardRequest.consistencyLevel(bulkRequest.consistencyLevel()); bulkShardRequest.timeout(bulkRequest.timeout()); shardBulkAction.execute( bulkShardRequest, new ActionListener<BulkShardResponse>() { @Override public void onResponse(BulkShardResponse bulkShardResponse) { for (BulkItemResponse bulkItemResponse : bulkShardResponse.getResponses()) { // we may have no response if item failed if (bulkItemResponse.getResponse() != null) { bulkItemResponse.getResponse().setShardInfo(bulkShardResponse.getShardInfo()); } responses.set(bulkItemResponse.getItemId(), bulkItemResponse); } if (counter.decrementAndGet() == 0) { finishHim(); } } @Override public void onFailure(Throwable e) { // create failures for all relevant requests for (BulkItemRequest request : requests) { if (request.request() instanceof IndexRequest) { IndexRequest indexRequest = (IndexRequest) request.request(); responses.set( request.id(), new BulkItemResponse( request.id(), indexRequest.opType().toString().toLowerCase(Locale.ENGLISH), new BulkItemResponse.Failure( concreteIndices.getConcreteIndex(indexRequest.index()), indexRequest.type(), indexRequest.id(), e))); } else if (request.request() instanceof DeleteRequest) { DeleteRequest deleteRequest = (DeleteRequest) request.request(); responses.set( request.id(), new BulkItemResponse( request.id(), "delete", new BulkItemResponse.Failure( concreteIndices.getConcreteIndex(deleteRequest.index()), deleteRequest.type(), deleteRequest.id(), e))); } else if (request.request() instanceof UpdateRequest) { UpdateRequest updateRequest = (UpdateRequest) request.request(); responses.set( request.id(), new BulkItemResponse( request.id(), "update", new BulkItemResponse.Failure( concreteIndices.getConcreteIndex(updateRequest.index()), updateRequest.type(), updateRequest.id(), e))); } } if (counter.decrementAndGet() == 0) { finishHim(); } } private void finishHim() { listener.onResponse( new BulkResponse( responses.toArray(new BulkItemResponse[responses.length()]), buildTookInMillis(startTime))); } }); } }