예제 #1
0
  private DiscoveryNode findMaster() {
    ZenPing.PingResponse[] fullPingResponses = pingService.pingAndWait(pingTimeout);
    if (fullPingResponses == null) {
      logger.trace("No full ping responses");
      return null;
    }
    if (logger.isTraceEnabled()) {
      StringBuilder sb = new StringBuilder("full ping responses:");
      if (fullPingResponses.length == 0) {
        sb.append(" {none}");
      } else {
        for (ZenPing.PingResponse pingResponse : fullPingResponses) {
          sb.append("\n\t--> ")
              .append("target [")
              .append(pingResponse.target())
              .append("], master [")
              .append(pingResponse.master())
              .append("]");
        }
      }
      logger.trace(sb.toString());
    }

    // filter responses
    List<ZenPing.PingResponse> pingResponses = Lists.newArrayList();
    for (ZenPing.PingResponse pingResponse : fullPingResponses) {
      DiscoveryNode node = pingResponse.target();
      if (masterElectionFilterClientNodes
          && (node.clientNode() || (!node.masterNode() && !node.dataNode()))) {
        // filter out the client node, which is a client node, or also one that is not data and not
        // master (effectively, client)
      } else if (masterElectionFilterDataNodes && (!node.masterNode() && node.dataNode())) {
        // filter out data node that is not also master
      } else {
        pingResponses.add(pingResponse);
      }
    }

    if (logger.isDebugEnabled()) {
      StringBuilder sb =
          new StringBuilder("filtered ping responses: (filter_client[")
              .append(masterElectionFilterClientNodes)
              .append("], filter_data[")
              .append(masterElectionFilterDataNodes)
              .append("])");
      if (pingResponses.isEmpty()) {
        sb.append(" {none}");
      } else {
        for (ZenPing.PingResponse pingResponse : pingResponses) {
          sb.append("\n\t--> ")
              .append("target [")
              .append(pingResponse.target())
              .append("], master [")
              .append(pingResponse.master())
              .append("]");
        }
      }
      logger.debug(sb.toString());
    }
    List<DiscoveryNode> pingMasters = newArrayList();
    for (ZenPing.PingResponse pingResponse : pingResponses) {
      if (pingResponse.master() != null) {
        pingMasters.add(pingResponse.master());
      }
    }

    Set<DiscoveryNode> possibleMasterNodes = Sets.newHashSet();
    possibleMasterNodes.add(localNode);
    for (ZenPing.PingResponse pingResponse : pingResponses) {
      possibleMasterNodes.add(pingResponse.target());
    }
    // if we don't have enough master nodes, we bail, even if we get a response that indicates
    // there is a master by other node, we don't see enough...
    if (!electMaster.hasEnoughMasterNodes(possibleMasterNodes)) {
      return null;
    }

    if (pingMasters.isEmpty()) {
      // lets tie break between discovered nodes
      DiscoveryNode electedMaster = electMaster.electMaster(possibleMasterNodes);
      if (localNode.equals(electedMaster)) {
        return localNode;
      }
    } else {
      DiscoveryNode electedMaster = electMaster.electMaster(pingMasters);
      if (electedMaster != null) {
        return electedMaster;
      }
    }
    return null;
  }
  @Override
  public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) throws Exception {
    Object m = e.getMessage();
    if (!(m instanceof ChannelBuffer)) {
      ctx.sendUpstream(e);
      return;
    }
    ChannelBuffer buffer = (ChannelBuffer) m;
    int size = buffer.getInt(buffer.readerIndex() - 4);
    transportServiceAdapter.received(size + 6);

    // we have additional bytes to read, outside of the header
    boolean hasMessageBytesToRead = (size - (NettyHeader.HEADER_SIZE - 6)) != 0;

    int markedReaderIndex = buffer.readerIndex();
    int expectedIndexReader = markedReaderIndex + size;

    // netty always copies a buffer, either in NioWorker in its read handler, where it copies to a
    // fresh
    // buffer, or in the cumlation buffer, which is cleaned each time
    StreamInput streamIn = ChannelBufferStreamInputFactory.create(buffer, size);

    long requestId = buffer.readLong();
    byte status = buffer.readByte();
    Version version = Version.fromId(buffer.readInt());

    StreamInput wrappedStream;
    if (TransportStatus.isCompress(status) && hasMessageBytesToRead && buffer.readable()) {
      Compressor compressor = CompressorFactory.compressor(buffer);
      if (compressor == null) {
        int maxToRead = Math.min(buffer.readableBytes(), 10);
        int offset = buffer.readerIndex();
        StringBuilder sb =
            new StringBuilder("stream marked as compressed, but no compressor found, first [")
                .append(maxToRead)
                .append("] content bytes out of [")
                .append(buffer.readableBytes())
                .append("] readable bytes with message size [")
                .append(size)
                .append("] ")
                .append("] are [");
        for (int i = 0; i < maxToRead; i++) {
          sb.append(buffer.getByte(offset + i)).append(",");
        }
        sb.append("]");
        throw new ElasticsearchIllegalStateException(sb.toString());
      }
      wrappedStream = CachedStreamInput.cachedHandlesCompressed(compressor, streamIn);
    } else {
      wrappedStream = CachedStreamInput.cachedHandles(streamIn);
    }
    wrappedStream.setVersion(version);

    if (TransportStatus.isRequest(status)) {
      String action = handleRequest(ctx.getChannel(), wrappedStream, requestId, version);
      if (buffer.readerIndex() != expectedIndexReader) {
        if (buffer.readerIndex() < expectedIndexReader) {
          logger.warn(
              "Message not fully read (request) for [{}] and action [{}], resetting",
              requestId,
              action);
        } else {
          logger.warn(
              "Message read past expected size (request) for [{}] and action [{}], resetting",
              requestId,
              action);
        }
        buffer.readerIndex(expectedIndexReader);
      }
    } else {
      TransportResponseHandler handler = transportServiceAdapter.remove(requestId);
      // ignore if its null, the adapter logs it
      if (handler != null) {
        if (TransportStatus.isError(status)) {
          handlerResponseError(wrappedStream, handler);
        } else {
          handleResponse(ctx.getChannel(), wrappedStream, handler);
        }
      } else {
        // if its null, skip those bytes
        buffer.readerIndex(markedReaderIndex + size);
      }
      if (buffer.readerIndex() != expectedIndexReader) {
        if (buffer.readerIndex() < expectedIndexReader) {
          logger.warn(
              "Message not fully read (response) for [{}] handler {}, error [{}], resetting",
              requestId,
              handler,
              TransportStatus.isError(status));
        } else {
          logger.warn(
              "Message read past expected size (response) for [{}] handler {}, error [{}], resetting",
              requestId,
              handler,
              TransportStatus.isError(status));
        }
        buffer.readerIndex(expectedIndexReader);
      }
    }
    wrappedStream.close();
  }
예제 #3
0
  private void doRecovery(final RecoveryStatus recoveryStatus) {
    assert recoveryStatus.sourceNode() != null : "can't do a recovery without a source node";

    logger.trace("collecting local files for {}", recoveryStatus);
    Store.MetadataSnapshot metadataSnapshot = null;
    try {
      metadataSnapshot = recoveryStatus.store().getMetadataOrEmpty();
    } catch (IOException e) {
      logger.warn("error while listing local files, recover as if there are none", e);
      metadataSnapshot = Store.MetadataSnapshot.EMPTY;
    } catch (Exception e) {
      // this will be logged as warning later on...
      logger.trace("unexpected error while listing local files, failing recovery", e);
      onGoingRecoveries.failRecovery(
          recoveryStatus.recoveryId(),
          new RecoveryFailedException(recoveryStatus.state(), "failed to list local files", e),
          true);
      return;
    }
    final StartRecoveryRequest request =
        new StartRecoveryRequest(
            recoveryStatus.shardId(),
            recoveryStatus.sourceNode(),
            clusterService.localNode(),
            false,
            metadataSnapshot,
            recoveryStatus.state().getType(),
            recoveryStatus.recoveryId());

    final AtomicReference<RecoveryResponse> responseHolder = new AtomicReference<>();
    try {
      logger.trace(
          "[{}][{}] starting recovery from {}",
          request.shardId().index().name(),
          request.shardId().id(),
          request.sourceNode());
      recoveryStatus.indexShard().prepareForIndexRecovery();
      recoveryStatus
          .CancellableThreads()
          .execute(
              new CancellableThreads.Interruptable() {
                @Override
                public void run() throws InterruptedException {
                  responseHolder.set(
                      transportService
                          .submitRequest(
                              request.sourceNode(),
                              RecoverySource.Actions.START_RECOVERY,
                              request,
                              new FutureTransportResponseHandler<RecoveryResponse>() {
                                @Override
                                public RecoveryResponse newInstance() {
                                  return new RecoveryResponse();
                                }
                              })
                          .txGet());
                }
              });
      final RecoveryResponse recoveryResponse = responseHolder.get();
      assert responseHolder != null;
      final TimeValue recoveryTime = new TimeValue(recoveryStatus.state().getTimer().time());
      // do this through ongoing recoveries to remove it from the collection
      onGoingRecoveries.markRecoveryAsDone(recoveryStatus.recoveryId());
      if (logger.isTraceEnabled()) {
        StringBuilder sb = new StringBuilder();
        sb.append('[')
            .append(request.shardId().index().name())
            .append(']')
            .append('[')
            .append(request.shardId().id())
            .append("] ");
        sb.append("recovery completed from ")
            .append(request.sourceNode())
            .append(", took[")
            .append(recoveryTime)
            .append("]\n");
        sb.append("   phase1: recovered_files [")
            .append(recoveryResponse.phase1FileNames.size())
            .append("]")
            .append(" with total_size of [")
            .append(new ByteSizeValue(recoveryResponse.phase1TotalSize))
            .append("]")
            .append(", took [")
            .append(timeValueMillis(recoveryResponse.phase1Time))
            .append("], throttling_wait [")
            .append(timeValueMillis(recoveryResponse.phase1ThrottlingWaitTime))
            .append(']')
            .append("\n");
        sb.append("         : reusing_files   [")
            .append(recoveryResponse.phase1ExistingFileNames.size())
            .append("] with total_size of [")
            .append(new ByteSizeValue(recoveryResponse.phase1ExistingTotalSize))
            .append("]\n");
        sb.append("   phase2: start took [")
            .append(timeValueMillis(recoveryResponse.startTime))
            .append("]\n");
        sb.append("         : recovered [")
            .append(recoveryResponse.phase2Operations)
            .append("]")
            .append(" transaction log operations")
            .append(", took [")
            .append(timeValueMillis(recoveryResponse.phase2Time))
            .append("]")
            .append("\n");
        logger.trace(sb.toString());
      } else {
        logger.debug(
            "{} recovery done from [{}], took [{}]",
            request.shardId(),
            recoveryStatus.sourceNode(),
            recoveryTime);
      }
    } catch (CancellableThreads.ExecutionCancelledException e) {
      logger.trace("recovery cancelled", e);
    } catch (Throwable e) {

      if (logger.isTraceEnabled()) {
        logger.trace(
            "[{}][{}] Got exception on recovery",
            e,
            request.shardId().index().name(),
            request.shardId().id());
      }
      Throwable cause = ExceptionsHelper.unwrapCause(e);
      if (cause instanceof RecoveryEngineException) {
        // unwrap an exception that was thrown as part of the recovery
        cause = cause.getCause();
      }
      // do it twice, in case we have double transport exception
      cause = ExceptionsHelper.unwrapCause(cause);
      if (cause instanceof RecoveryEngineException) {
        // unwrap an exception that was thrown as part of the recovery
        cause = cause.getCause();
      }

      // here, we would add checks against exception that need to be retried (and not removeAndClean
      // in this case)

      if (cause instanceof IllegalIndexShardStateException
          || cause instanceof IndexNotFoundException
          || cause instanceof ShardNotFoundException) {
        // if the target is not ready yet, retry
        retryRecovery(
            recoveryStatus,
            "remote shard not ready",
            recoverySettings.retryDelayStateSync(),
            request);
        return;
      }

      if (cause instanceof DelayRecoveryException) {
        retryRecovery(recoveryStatus, cause, recoverySettings.retryDelayStateSync(), request);
        return;
      }

      if (cause instanceof ConnectTransportException) {
        logger.debug(
            "delaying recovery of {} for [{}] due to networking error [{}]",
            recoveryStatus.shardId(),
            recoverySettings.retryDelayNetwork(),
            cause.getMessage());
        retryRecovery(
            recoveryStatus, cause.getMessage(), recoverySettings.retryDelayNetwork(), request);
        return;
      }

      if (cause instanceof IndexShardClosedException) {
        onGoingRecoveries.failRecovery(
            recoveryStatus.recoveryId(),
            new RecoveryFailedException(request, "source shard is closed", cause),
            false);
        return;
      }

      if (cause instanceof AlreadyClosedException) {
        onGoingRecoveries.failRecovery(
            recoveryStatus.recoveryId(),
            new RecoveryFailedException(request, "source shard is closed", cause),
            false);
        return;
      }

      onGoingRecoveries.failRecovery(
          recoveryStatus.recoveryId(), new RecoveryFailedException(request, e), true);
    }
  }