/** * Perform phase 3 of the recovery process * * <p>Phase3 again takes a snapshot of the translog, however this time the snapshot is acquired * under a write lock. The translog operations are sent to the target node where they are * replayed. * * <p>{@code InternalEngine#recover} is responsible for taking the snapshot of the translog, and * after phase 3 completes the snapshots from all three phases are released. */ @Override public void phase3(Translog.Snapshot snapshot) throws ElasticsearchException { if (shard.state() == IndexShardState.CLOSED) { throw new IndexShardClosedException(request.shardId()); } cancellableThreads.checkForCancel(); StopWatch stopWatch = new StopWatch().start(); final int totalOperations; logger.trace( "[{}][{}] recovery [phase3] to {}: sending transaction log operations", indexName, shardId, request.targetNode()); // Send the translog operations to the target node totalOperations = sendSnapshot(snapshot); cancellableThreads.execute( new Interruptable() { @Override public void run() throws InterruptedException { // Send the FINALIZE request to the target node. The finalize request // clears unreferenced translog files, refreshes the engine now that // new segments are available, and enables garbage collection of // tombstone files. The shard is also moved to the POST_RECOVERY phase // during this time transportService .submitRequest( request.targetNode(), RecoveryTarget.Actions.FINALIZE, new RecoveryFinalizeRecoveryRequest(request.recoveryId(), request.shardId()), TransportRequestOptions.options() .withTimeout(recoverySettings.internalActionLongTimeout()), EmptyTransportResponseHandler.INSTANCE_SAME) .txGet(); } }); if (request.markAsRelocated()) { // TODO what happens if the recovery process fails afterwards, we need to mark this back to // started try { shard.relocated("to " + request.targetNode()); } catch (IllegalIndexShardStateException e) { // we can ignore this exception since, on the other node, when it moved to phase3 // it will also send shard started, which might cause the index shard we work against // to move be closed by the time we get to the the relocated method } } stopWatch.stop(); logger.trace( "[{}][{}] recovery [phase3] to {}: took [{}]", indexName, shardId, request.targetNode(), stopWatch.totalTime()); response.phase3Time = stopWatch.totalTime().millis(); response.phase3Operations = totalOperations; }
private RecoveryResponse recover(final StartRecoveryRequest request) { final IndexService indexService = indicesService.indexServiceSafe(request.shardId().index().name()); final IndexShard shard = indexService.getShard(request.shardId().id()); // starting recovery from that our (the source) shard state is marking the shard to be in // recovery mode as well, otherwise // the index operations will not be routed to it properly RoutingNode node = clusterService.state().getRoutingNodes().node(request.targetNode().id()); if (node == null) { logger.debug( "delaying recovery of {} as source node {} is unknown", request.shardId(), request.targetNode()); throw new DelayRecoveryException( "source node does not have the node [" + request.targetNode() + "] in its state yet.."); } ShardRouting targetShardRouting = null; for (ShardRouting shardRouting : node) { if (shardRouting.shardId().equals(request.shardId())) { targetShardRouting = shardRouting; break; } } if (targetShardRouting == null) { logger.debug( "delaying recovery of {} as it is not listed as assigned to target node {}", request.shardId(), request.targetNode()); throw new DelayRecoveryException( "source node does not have the shard listed in its state as allocated on the node"); } if (!targetShardRouting.initializing()) { logger.debug( "delaying recovery of {} as it is not listed as initializing on the target node {}. known shards state is [{}]", request.shardId(), request.targetNode(), targetShardRouting.state()); throw new DelayRecoveryException( "source node has the state of the target shard to be [" + targetShardRouting.state() + "], expecting to be [initializing]"); } logger.trace( "[{}][{}] starting recovery to {}, mark_as_relocated {}", request.shardId().index().name(), request.shardId().id(), request.targetNode(), request.markAsRelocated()); final RecoverySourceHandler handler; if (IndexMetaData.isOnSharedFilesystem(shard.indexSettings())) { handler = new SharedFSRecoverySourceHandler( shard, request, recoverySettings, transportService, logger); } else { handler = new RecoverySourceHandler(shard, request, recoverySettings, transportService, logger); } ongoingRecoveries.add(shard, handler); try { return handler.recoverToTarget(); } finally { ongoingRecoveries.remove(shard, handler); } }