public void testBatchOperationException() throws IOException {
    ShardId id = new ShardId("foo", 1);
    TranslogRecoveryPerformer.BatchOperationException ex =
        serialize(
            new TranslogRecoveryPerformer.BatchOperationException(
                id, "batched the f****r", 666, null));
    assertEquals(ex.shardId(), id);
    assertEquals(666, ex.completedOperations());
    assertEquals("batched the f****r", ex.getMessage());
    assertNull(ex.getCause());

    ex =
        serialize(
            new TranslogRecoveryPerformer.BatchOperationException(
                null, "batched the f****r", -1, new NullPointerException()));
    assertNull(ex.shardId());
    assertEquals(-1, ex.completedOperations());
    assertEquals("batched the f****r", ex.getMessage());
    assertTrue(ex.getCause() instanceof NullPointerException);
  }
    @Override
    public void messageReceived(
        final RecoveryTranslogOperationsRequest request, final TransportChannel channel)
        throws IOException {
      try (RecoveriesCollection.RecoveryRef recoveryRef =
          onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) {
        final ClusterStateObserver observer =
            new ClusterStateObserver(clusterService, null, logger, threadPool.getThreadContext());
        final RecoveryTarget recoveryTarget = recoveryRef.status();
        try {
          recoveryTarget.indexTranslogOperations(request.operations(), request.totalTranslogOps());
          channel.sendResponse(TransportResponse.Empty.INSTANCE);
        } catch (TranslogRecoveryPerformer.BatchOperationException exception) {
          MapperException mapperException =
              (MapperException) ExceptionsHelper.unwrap(exception, MapperException.class);
          if (mapperException == null) {
            throw exception;
          }
          // in very rare cases a translog replay from primary is processed before a mapping update
          // on this node
          // which causes local mapping changes since the mapping (clusterstate) might not have
          // arrived on this node.
          // we want to wait until these mappings are processed but also need to do some maintenance
          // and roll back the
          // number of processed (completed) operations in this batch to ensure accounting is
          // correct.
          logger.trace(
              (Supplier<?>)
                  () ->
                      new ParameterizedMessage(
                          "delaying recovery due to missing mapping changes (rolling back stats for [{}] ops)",
                          exception.completedOperations()),
              exception);
          final RecoveryState.Translog translog = recoveryTarget.state().getTranslog();
          translog.decrementRecoveredOperations(
              exception.completedOperations()); // do the maintainance and rollback competed ops
          // we do not need to use a timeout here since the entire recovery mechanism has an
          // inactivity protection (it will be
          // canceled)
          observer.waitForNextChange(
              new ClusterStateObserver.Listener() {
                @Override
                public void onNewClusterState(ClusterState state) {
                  try {
                    messageReceived(request, channel);
                  } catch (Exception e) {
                    onFailure(e);
                  }
                }

                protected void onFailure(Exception e) {
                  try {
                    channel.sendResponse(e);
                  } catch (IOException e1) {
                    logger.warn("failed to send error back to recovery source", e1);
                  }
                }

                @Override
                public void onClusterServiceClose() {
                  onFailure(
                      new ElasticsearchException(
                          "cluster service was closed while waiting for mapping updates"));
                }

                @Override
                public void onTimeout(TimeValue timeout) {
                  // note that we do not use a timeout (see comment above)
                  onFailure(
                      new ElasticsearchTimeoutException(
                          "timed out waiting for mapping updates (timeout [" + timeout + "])"));
                }
              });
        }
      }
    }
Пример #3
0
    @Override
    public void messageReceived(
        final RecoveryTranslogOperationsRequest request, final TransportChannel channel)
        throws Exception {
      try (RecoveriesCollection.StatusRef statusRef =
          onGoingRecoveries.getStatusSafe(request.recoveryId(), request.shardId())) {
        final ClusterStateObserver observer =
            new ClusterStateObserver(clusterService, null, logger);
        final RecoveryStatus recoveryStatus = statusRef.status();
        final RecoveryState.Translog translog = recoveryStatus.state().getTranslog();
        translog.totalOperations(request.totalTranslogOps());
        assert recoveryStatus.indexShard().recoveryState() == recoveryStatus.state();
        try {
          recoveryStatus.indexShard().performBatchRecovery(request.operations());
          channel.sendResponse(TransportResponse.Empty.INSTANCE);
        } catch (TranslogRecoveryPerformer.BatchOperationException exception) {
          MapperException mapperException =
              (MapperException) ExceptionsHelper.unwrap(exception, MapperException.class);
          if (mapperException == null) {
            throw exception;
          }
          // in very rare cases a translog replay from primary is processed before a mapping update
          // on this node
          // which causes local mapping changes. we want to wait until these mappings are processed.
          logger.trace(
              "delaying recovery due to missing mapping changes (rolling back stats for [{}] ops)",
              exception,
              exception.completedOperations());
          translog.decrementRecoveredOperations(exception.completedOperations());
          // we do not need to use a timeout here since the entire recovery mechanism has an
          // inactivity protection (it will be
          // canceled)
          observer.waitForNextChange(
              new ClusterStateObserver.Listener() {
                @Override
                public void onNewClusterState(ClusterState state) {
                  try {
                    messageReceived(request, channel);
                  } catch (Exception e) {
                    onFailure(e);
                  }
                }

                protected void onFailure(Exception e) {
                  try {
                    channel.sendResponse(e);
                  } catch (IOException e1) {
                    logger.warn("failed to send error back to recovery source", e1);
                  }
                }

                @Override
                public void onClusterServiceClose() {
                  onFailure(
                      new ElasticsearchException(
                          "cluster service was closed while waiting for mapping updates"));
                }

                @Override
                public void onTimeout(TimeValue timeout) {
                  // note that we do not use a timeout (see comment above)
                  onFailure(
                      new ElasticsearchTimeoutException(
                          "timed out waiting for mapping updates (timeout [" + timeout + "])"));
                }
              });
        }
      }
    }