public void testBatchOperationException() throws IOException { ShardId id = new ShardId("foo", 1); TranslogRecoveryPerformer.BatchOperationException ex = serialize( new TranslogRecoveryPerformer.BatchOperationException( id, "batched the f****r", 666, null)); assertEquals(ex.shardId(), id); assertEquals(666, ex.completedOperations()); assertEquals("batched the f****r", ex.getMessage()); assertNull(ex.getCause()); ex = serialize( new TranslogRecoveryPerformer.BatchOperationException( null, "batched the f****r", -1, new NullPointerException())); assertNull(ex.shardId()); assertEquals(-1, ex.completedOperations()); assertEquals("batched the f****r", ex.getMessage()); assertTrue(ex.getCause() instanceof NullPointerException); }
@Override public void messageReceived( final RecoveryTranslogOperationsRequest request, final TransportChannel channel) throws IOException { try (RecoveriesCollection.RecoveryRef recoveryRef = onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) { final ClusterStateObserver observer = new ClusterStateObserver(clusterService, null, logger, threadPool.getThreadContext()); final RecoveryTarget recoveryTarget = recoveryRef.status(); try { recoveryTarget.indexTranslogOperations(request.operations(), request.totalTranslogOps()); channel.sendResponse(TransportResponse.Empty.INSTANCE); } catch (TranslogRecoveryPerformer.BatchOperationException exception) { MapperException mapperException = (MapperException) ExceptionsHelper.unwrap(exception, MapperException.class); if (mapperException == null) { throw exception; } // in very rare cases a translog replay from primary is processed before a mapping update // on this node // which causes local mapping changes since the mapping (clusterstate) might not have // arrived on this node. // we want to wait until these mappings are processed but also need to do some maintenance // and roll back the // number of processed (completed) operations in this batch to ensure accounting is // correct. logger.trace( (Supplier<?>) () -> new ParameterizedMessage( "delaying recovery due to missing mapping changes (rolling back stats for [{}] ops)", exception.completedOperations()), exception); final RecoveryState.Translog translog = recoveryTarget.state().getTranslog(); translog.decrementRecoveredOperations( exception.completedOperations()); // do the maintainance and rollback competed ops // we do not need to use a timeout here since the entire recovery mechanism has an // inactivity protection (it will be // canceled) observer.waitForNextChange( new ClusterStateObserver.Listener() { @Override public void onNewClusterState(ClusterState state) { try { messageReceived(request, channel); } catch (Exception e) { onFailure(e); } } protected void onFailure(Exception e) { try { channel.sendResponse(e); } catch (IOException e1) { logger.warn("failed to send error back to recovery source", e1); } } @Override public void onClusterServiceClose() { onFailure( new ElasticsearchException( "cluster service was closed while waiting for mapping updates")); } @Override public void onTimeout(TimeValue timeout) { // note that we do not use a timeout (see comment above) onFailure( new ElasticsearchTimeoutException( "timed out waiting for mapping updates (timeout [" + timeout + "])")); } }); } } }
@Override public void messageReceived( final RecoveryTranslogOperationsRequest request, final TransportChannel channel) throws Exception { try (RecoveriesCollection.StatusRef statusRef = onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) { final ClusterStateObserver observer = new ClusterStateObserver(clusterService, null, logger); final RecoveryStatus recoveryStatus = statusRef.status(); final RecoveryState.Translog translog = recoveryStatus.state().getTranslog(); translog.totalOperations(request.totalTranslogOps()); assert recoveryStatus.indexShard().recoveryState() == recoveryStatus.state(); try { recoveryStatus.indexShard().performBatchRecovery(request.operations()); channel.sendResponse(TransportResponse.Empty.INSTANCE); } catch (TranslogRecoveryPerformer.BatchOperationException exception) { MapperException mapperException = (MapperException) ExceptionsHelper.unwrap(exception, MapperException.class); if (mapperException == null) { throw exception; } // in very rare cases a translog replay from primary is processed before a mapping update // on this node // which causes local mapping changes. we want to wait until these mappings are processed. logger.trace( "delaying recovery due to missing mapping changes (rolling back stats for [{}] ops)", exception, exception.completedOperations()); translog.decrementRecoveredOperations(exception.completedOperations()); // we do not need to use a timeout here since the entire recovery mechanism has an // inactivity protection (it will be // canceled) observer.waitForNextChange( new ClusterStateObserver.Listener() { @Override public void onNewClusterState(ClusterState state) { try { messageReceived(request, channel); } catch (Exception e) { onFailure(e); } } protected void onFailure(Exception e) { try { channel.sendResponse(e); } catch (IOException e1) { logger.warn("failed to send error back to recovery source", e1); } } @Override public void onClusterServiceClose() { onFailure( new ElasticsearchException( "cluster service was closed while waiting for mapping updates")); } @Override public void onTimeout(TimeValue timeout) { // note that we do not use a timeout (see comment above) onFailure( new ElasticsearchTimeoutException( "timed out waiting for mapping updates (timeout [" + timeout + "])")); } }); } } }