Exemplo n.º 1
0
  @Override
  public void fetchFailed(
      String host, InputAttemptIdentifier srcAttemptIdentifier, boolean connectFailed) {
    // TODO NEWTEZ. Implement logic to report fetch failures after a threshold.
    // For now, reporting immediately.
    LOG.info(
        "Fetch failed for src: "
            + srcAttemptIdentifier
            + "InputIdentifier: "
            + srcAttemptIdentifier
            + ", connectFailed: "
            + connectFailed);
    failedShufflesCounter.increment(1);
    if (srcAttemptIdentifier == null) {
      String message = "Received fetchFailure for an unknown src (null)";
      LOG.fatal(message);
      inputContext.fatalError(null, message);
    } else {
      InputReadErrorEvent readError =
          new InputReadErrorEvent(
              "Fetch failure while fetching from "
                  + TezRuntimeUtils.getTaskAttemptIdentifier(
                      inputContext.getSourceVertexName(),
                      srcAttemptIdentifier.getInputIdentifier().getInputIndex(),
                      srcAttemptIdentifier.getAttemptNumber()),
              srcAttemptIdentifier.getInputIdentifier().getInputIndex(),
              srcAttemptIdentifier.getAttemptNumber());

      List<Event> failedEvents = Lists.newArrayListWithCapacity(1);
      failedEvents.add(readError);
      inputContext.sendEvents(failedEvents);
    }
  }
Exemplo n.º 2
0
 public void addKnownInput(
     String hostName,
     int port,
     InputAttemptIdentifier srcAttemptIdentifier,
     int srcPhysicalIndex) {
   String identifier = InputHost.createIdentifier(hostName, port);
   InputHost host = knownSrcHosts.get(identifier);
   if (host == null) {
     host = new InputHost(hostName, port, inputContext.getApplicationId(), srcPhysicalIndex);
     assert identifier.equals(host.getIdentifier());
     InputHost old = knownSrcHosts.putIfAbsent(identifier, host);
     if (old != null) {
       host = old;
     }
   }
   if (LOG.isDebugEnabled()) {
     LOG.debug("Adding input: " + srcAttemptIdentifier + ", to host: " + host);
   }
   host.addKnownInput(srcAttemptIdentifier);
   lock.lock();
   try {
     boolean added = pendingHosts.offer(host);
     if (!added) {
       String errorMessage = "Unable to add host: " + host.getIdentifier() + " to pending queue";
       LOG.error(errorMessage);
       throw new TezUncheckedException(errorMessage);
     }
     wakeLoop.signal();
   } finally {
     lock.unlock();
   }
 }
Exemplo n.º 3
0
 private void registerCompletedInput(FetchedInput fetchedInput) {
   lock.lock();
   try {
     completedInputSet.add(fetchedInput.getInputAttemptIdentifier().getInputIdentifier());
     completedInputs.add(fetchedInput);
     if (!inputReadyNotificationSent.getAndSet(true)) {
       // TODO Should eventually be controlled by Inputs which are processing the data.
       inputContext.inputIsReady();
     }
     int numComplete = numCompletedInputs.incrementAndGet();
     if (numComplete == numInputs) {
       LOG.info("All inputs fetched for input vertex : " + inputContext.getSourceVertexName());
     }
   } finally {
     lock.unlock();
   }
 }
Exemplo n.º 4
0
  private Fetcher constructFetcherForHost(InputHost inputHost) {
    FetcherBuilder fetcherBuilder =
        new FetcherBuilder(
            ShuffleManager.this,
            httpConnectionParams,
            inputManager,
            inputContext.getApplicationId(),
            shuffleSecret,
            srcNameTrimmed);
    if (codec != null) {
      fetcherBuilder.setCompressionParameters(codec);
    }
    fetcherBuilder.setIFileParams(ifileReadAhead, ifileReadAheadLength);

    // Remove obsolete inputs from the list being given to the fetcher. Also
    // remove from the obsolete list.
    List<InputAttemptIdentifier> pendingInputsForHost = inputHost.clearAndGetPendingInputs();
    for (Iterator<InputAttemptIdentifier> inputIter = pendingInputsForHost.iterator();
        inputIter.hasNext(); ) {
      InputAttemptIdentifier input = inputIter.next();
      // Avoid adding attempts which have already completed.
      if (completedInputSet.contains(input.getInputIdentifier())) {
        inputIter.remove();
        continue;
      }
      // Avoid adding attempts which have been marked as OBSOLETE
      if (obsoletedInputs.contains(input)) {
        inputIter.remove();
      }
    }
    // TODO NEWTEZ Maybe limit the number of inputs being given to a single
    // fetcher, especially in the case where #hosts < #fetchers
    fetcherBuilder.assignWork(
        inputHost.getHost(),
        inputHost.getPort(),
        inputHost.getSrcPhysicalIndex(),
        pendingInputsForHost);
    LOG.info(
        "Created Fetcher for host: "
            + inputHost.getHost()
            + ", with inputs: "
            + pendingInputsForHost);
    return fetcherBuilder.build();
  }
Exemplo n.º 5
0
  public ShuffleManager(
      TezInputContext inputContext,
      Configuration conf,
      int numInputs,
      int bufferSize,
      boolean ifileReadAheadEnabled,
      int ifileReadAheadLength,
      CompressionCodec codec,
      FetchedInputAllocator inputAllocator)
      throws IOException {
    this.inputContext = inputContext;
    this.numInputs = numInputs;

    this.shuffledInputsCounter =
        inputContext.getCounters().findCounter(TaskCounter.NUM_SHUFFLED_INPUTS);
    this.failedShufflesCounter =
        inputContext.getCounters().findCounter(TaskCounter.NUM_FAILED_SHUFFLE_INPUTS);
    this.bytesShuffledCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES);
    this.decompressedDataSizeCounter =
        inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_DECOMPRESSED);
    this.bytesShuffledToDiskCounter =
        inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_DISK);
    this.bytesShuffledToMemCounter =
        inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_MEM);

    this.ifileBufferSize = bufferSize;
    this.ifileReadAhead = ifileReadAheadEnabled;
    this.ifileReadAheadLength = ifileReadAheadLength;
    this.codec = codec;
    this.inputManager = inputAllocator;

    this.srcNameTrimmed = TezUtils.cleanVertexName(inputContext.getSourceVertexName());

    completedInputSet =
        Collections.newSetFromMap(new ConcurrentHashMap<InputIdentifier, Boolean>(numInputs));
    completedInputs = new LinkedBlockingQueue<FetchedInput>(numInputs);
    knownSrcHosts = new ConcurrentHashMap<String, InputHost>();
    pendingHosts = new LinkedBlockingQueue<InputHost>();
    obsoletedInputs =
        Collections.newSetFromMap(new ConcurrentHashMap<InputAttemptIdentifier, Boolean>());
    runningFetchers = Collections.newSetFromMap(new ConcurrentHashMap<Fetcher, Boolean>());

    int maxConfiguredFetchers =
        conf.getInt(
            TezJobConfig.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES,
            TezJobConfig.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES_DEFAULT);

    this.numFetchers = Math.min(maxConfiguredFetchers, numInputs);

    ExecutorService fetcherRawExecutor =
        Executors.newFixedThreadPool(
            numFetchers,
            new ThreadFactoryBuilder()
                .setDaemon(true)
                .setNameFormat("Fetcher [" + srcNameTrimmed + "] #%d")
                .build());
    this.fetcherExecutor = MoreExecutors.listeningDecorator(fetcherRawExecutor);

    ExecutorService schedulerRawExecutor =
        Executors.newFixedThreadPool(
            1,
            new ThreadFactoryBuilder()
                .setDaemon(true)
                .setNameFormat("ShuffleRunner [" + srcNameTrimmed + "]")
                .build());
    this.schedulerExecutor = MoreExecutors.listeningDecorator(schedulerRawExecutor);

    this.startTime = System.currentTimeMillis();
    this.lastProgressTime = startTime;

    this.shuffleSecret =
        ShuffleUtils.getJobTokenSecretFromTokenBytes(
            inputContext.getServiceConsumerMetaData(
                TezConfiguration.TEZ_SHUFFLE_HANDLER_SERVICE_ID));
    httpConnectionParams = ShuffleUtils.constructHttpShuffleConnectionParams(conf);
    LOG.info(
        this.getClass().getSimpleName()
            + " : numInputs="
            + numInputs
            + ", compressionCodec="
            + (codec == null ? "NoCompressionCodec" : codec.getClass().getName())
            + ", numFetchers="
            + numFetchers
            + ", ifileBufferSize="
            + ifileBufferSize
            + ", ifileReadAheadEnabled="
            + ifileReadAhead
            + ", ifileReadAheadLength="
            + ifileReadAheadLength
            + ", "
            + httpConnectionParams.toString());
  }
  public Fetcher(
      Configuration job,
      ShuffleScheduler scheduler,
      MergeManager merger,
      ShuffleClientMetrics metrics,
      Shuffle shuffle,
      SecretKey jobTokenSecret,
      TezInputContext inputContext)
      throws IOException {
    this.job = job;
    this.scheduler = scheduler;
    this.merger = merger;
    this.metrics = metrics;
    this.shuffle = shuffle;
    this.id = ++nextId;
    this.jobTokenSecret = jobTokenSecret;
    ioErrs =
        inputContext
            .getCounters()
            .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.IO_ERROR.toString());
    wrongLengthErrs =
        inputContext
            .getCounters()
            .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_LENGTH.toString());
    badIdErrs =
        inputContext
            .getCounters()
            .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.BAD_ID.toString());
    wrongMapErrs =
        inputContext
            .getCounters()
            .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_MAP.toString());
    connectionErrs =
        inputContext
            .getCounters()
            .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.CONNECTION.toString());
    wrongReduceErrs =
        inputContext
            .getCounters()
            .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_REDUCE.toString());

    if (ConfigUtils.isIntermediateInputCompressed(job)) {
      Class<? extends CompressionCodec> codecClass =
          ConfigUtils.getIntermediateInputCompressorClass(job, DefaultCodec.class);
      codec = ReflectionUtils.newInstance(codecClass, job);
      decompressor = CodecPool.getDecompressor(codec);
    } else {
      codec = null;
      decompressor = null;
    }

    this.connectionTimeout =
        job.getInt(
            TezJobConfig.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_STALLED_COPY_TIMEOUT);
    this.readTimeout =
        job.getInt(
            TezJobConfig.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT);

    setName("fetcher#" + id);
    setDaemon(true);

    synchronized (Fetcher.class) {
      sslShuffle =
          job.getBoolean(
              TezJobConfig.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL,
              TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_ENABLE_SSL);
      if (sslShuffle && sslFactory == null) {
        sslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, job);
        try {
          sslFactory.init();
        } catch (Exception ex) {
          sslFactory.destroy();
          throw new RuntimeException(ex);
        }
      }
    }
  }