public void run() {
    try {
      while (!stopped && !Thread.currentThread().isInterrupted()) {
        MapHost host = null;
        try {
          // If merge is on, block
          merger.waitForInMemoryMerge();

          // Get a host to shuffle from
          host = scheduler.getHost();
          metrics.threadBusy();

          // Shuffle
          copyFromHost(host);
        } finally {
          if (host != null) {
            scheduler.freeHost(host);
            metrics.threadFree();
          }
        }
      }
    } catch (InterruptedException ie) {
      return;
    } catch (Throwable t) {
      shuffle.reportException(t);
    }
  }
  private void shuffleToDisk(
      MapHost host, MapOutput mapOutput, InputStream input, long compressedLength)
      throws IOException {
    // Copy data to local-disk
    OutputStream output = mapOutput.getDisk();
    long bytesLeft = compressedLength;
    try {
      final int BYTES_TO_READ = 64 * 1024;
      byte[] buf = new byte[BYTES_TO_READ];
      while (bytesLeft > 0) {
        int n = input.read(buf, 0, (int) Math.min(bytesLeft, BYTES_TO_READ));
        if (n < 0) {
          throw new IOException(
              "read past end of stream reading " + mapOutput.getAttemptIdentifier());
        }
        output.write(buf, 0, n);
        bytesLeft -= n;
        metrics.inputBytes(n);
      }

      LOG.info(
          "Read "
              + (compressedLength - bytesLeft)
              + " bytes from map-output for "
              + mapOutput.getAttemptIdentifier());

      output.close();
    } catch (IOException ioe) {
      // Close the streams
      IOUtils.cleanup(LOG, input, output);

      // Re-throw
      throw ioe;
    }

    // Sanity check
    if (bytesLeft != 0) {
      throw new IOException(
          "Incomplete map output received for "
              + mapOutput.getAttemptIdentifier()
              + " from "
              + host.getHostName()
              + " ("
              + bytesLeft
              + " bytes missing of "
              + compressedLength
              + ")");
    }
  }
  private void shuffleToMemory(
      MapHost host,
      MapOutput mapOutput,
      InputStream input,
      int decompressedLength,
      int compressedLength)
      throws IOException {
    IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, job);

    input = checksumIn;

    // Are map-outputs compressed?
    if (codec != null) {
      decompressor.reset();
      input = codec.createInputStream(input, decompressor);
    }

    // Copy map-output into an in-memory buffer
    byte[] shuffleData = mapOutput.getMemory();

    try {
      IOUtils.readFully(input, shuffleData, 0, shuffleData.length);
      metrics.inputBytes(shuffleData.length);
      LOG.info(
          "Read "
              + shuffleData.length
              + " bytes from map-output for "
              + mapOutput.getAttemptIdentifier());
    } catch (IOException ioe) {
      // Close the streams
      IOUtils.cleanup(LOG, input);

      // Re-throw
      throw ioe;
    }
  }
  private InputAttemptIdentifier[] copyMapOutput(
      MapHost host, DataInputStream input, Set<InputAttemptIdentifier> remaining) {
    MapOutput mapOutput = null;
    InputAttemptIdentifier srcAttemptId = null;
    long decompressedLength = -1;
    long compressedLength = -1;

    try {
      long startTime = System.currentTimeMillis();
      int forReduce = -1;
      // Read the shuffle header
      try {
        ShuffleHeader header = new ShuffleHeader();
        header.readFields(input);
        srcAttemptId = scheduler.getIdentifierForFetchedOutput(header.mapId, header.forReduce);
        compressedLength = header.compressedLength;
        decompressedLength = header.uncompressedLength;
        forReduce = header.forReduce;
      } catch (IllegalArgumentException e) {
        badIdErrs.increment(1);
        LOG.warn("Invalid map id ", e);
        // Don't know which one was bad, so consider all of them as bad
        return remaining.toArray(new InputAttemptIdentifier[remaining.size()]);
      }

      // Do some basic sanity verification
      if (!verifySanity(compressedLength, decompressedLength, forReduce, remaining, srcAttemptId)) {
        return new InputAttemptIdentifier[] {srcAttemptId};
      }

      if (LOG.isDebugEnabled()) {
        LOG.debug(
            "header: "
                + srcAttemptId
                + ", len: "
                + compressedLength
                + ", decomp len: "
                + decompressedLength);
      }

      // Get the location for the map output - either in-memory or on-disk
      mapOutput = merger.reserve(srcAttemptId, decompressedLength, id);

      // Check if we can shuffle *now* ...
      if (mapOutput.getType() == Type.WAIT) {
        LOG.info("fetcher#" + id + " - MergerManager returned Status.WAIT ...");
        // Not an error but wait to process data.
        return EMPTY_ATTEMPT_ID_ARRAY;
      }

      // Go!
      LOG.info(
          "fetcher#"
              + id
              + " about to shuffle output of map "
              + mapOutput.getAttemptIdentifier()
              + " decomp: "
              + decompressedLength
              + " len: "
              + compressedLength
              + " to "
              + mapOutput.getType());
      if (mapOutput.getType() == Type.MEMORY) {
        shuffleToMemory(host, mapOutput, input, (int) decompressedLength, (int) compressedLength);
      } else {
        shuffleToDisk(host, mapOutput, input, compressedLength);
      }

      // Inform the shuffle scheduler
      long endTime = System.currentTimeMillis();
      scheduler.copySucceeded(srcAttemptId, host, compressedLength, endTime - startTime, mapOutput);
      // Note successful shuffle
      remaining.remove(srcAttemptId);
      metrics.successFetch();
      return null;
    } catch (IOException ioe) {
      ioErrs.increment(1);
      if (srcAttemptId == null || mapOutput == null) {
        LOG.info(
            "fetcher#"
                + id
                + " failed to read map header"
                + srcAttemptId
                + " decomp: "
                + decompressedLength
                + ", "
                + compressedLength,
            ioe);
        if (srcAttemptId == null) {
          return remaining.toArray(new InputAttemptIdentifier[remaining.size()]);
        } else {
          return new InputAttemptIdentifier[] {srcAttemptId};
        }
      }

      LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host.getHostName(), ioe);

      // Inform the shuffle-scheduler
      mapOutput.abort();
      metrics.failedFetch();
      return new InputAttemptIdentifier[] {srcAttemptId};
    }
  }
Beispiel #5
0
  private TaskAttemptID[] copyMapOutput(
      MapHost host, DataInputStream input, Set<TaskAttemptID> remaining) {
    MapOutput<K, V> mapOutput = null;
    TaskAttemptID mapId = null;
    long decompressedLength = -1;
    long compressedLength = -1;

    try {
      long startTime = System.currentTimeMillis();
      int forReduce = -1;
      // Read the shuffle header
      try {
        ShuffleHeader header = new ShuffleHeader();
        header.readFields(input);
        mapId = TaskAttemptID.forName(header.mapId);
        compressedLength = header.compressedLength;
        decompressedLength = header.uncompressedLength;
        forReduce = header.forReduce;
      } catch (IllegalArgumentException e) {
        badIdErrs.increment(1);
        LOG.warn("Invalid map id ", e);
        // Don't know which one was bad, so consider all of them as bad
        return remaining.toArray(new TaskAttemptID[remaining.size()]);
      }

      InputStream is = input;
      is = CryptoUtils.wrapIfNecessary(jobConf, is, compressedLength);
      compressedLength -= CryptoUtils.cryptoPadding(jobConf);
      decompressedLength -= CryptoUtils.cryptoPadding(jobConf);

      // Do some basic sanity verification
      if (!verifySanity(compressedLength, decompressedLength, forReduce, remaining, mapId)) {
        return new TaskAttemptID[] {mapId};
      }

      if (LOG.isDebugEnabled()) {
        LOG.debug(
            "header: "
                + mapId
                + ", len: "
                + compressedLength
                + ", decomp len: "
                + decompressedLength);
      }

      // Get the location for the map output - either in-memory or on-disk
      try {
        mapOutput = merger.reserve(mapId, decompressedLength, id);
      } catch (IOException ioe) {
        // kill this reduce attempt
        ioErrs.increment(1);
        scheduler.reportLocalError(ioe);
        return EMPTY_ATTEMPT_ID_ARRAY;
      }

      // Check if we can shuffle *now* ...
      if (mapOutput == null) {
        LOG.info("fetcher#" + id + " - MergeManager returned status WAIT ...");
        // Not an error but wait to process data.
        return EMPTY_ATTEMPT_ID_ARRAY;
      }

      // The codec for lz0,lz4,snappy,bz2,etc. throw java.lang.InternalError
      // on decompression failures. Catching and re-throwing as IOException
      // to allow fetch failure logic to be processed
      try {
        // Go!
        LOG.info(
            "fetcher#"
                + id
                + " about to shuffle output of map "
                + mapOutput.getMapId()
                + " decomp: "
                + decompressedLength
                + " len: "
                + compressedLength
                + " to "
                + mapOutput.getDescription());
        mapOutput.shuffle(host, is, compressedLength, decompressedLength, metrics, reporter);
      } catch (java.lang.InternalError e) {
        LOG.warn("Failed to shuffle for fetcher#" + id, e);
        throw new IOException(e);
      }

      // Inform the shuffle scheduler
      long endTime = System.currentTimeMillis();
      scheduler.copySucceeded(mapId, host, compressedLength, endTime - startTime, mapOutput);
      // Note successful shuffle
      remaining.remove(mapId);
      metrics.successFetch();
      return null;
    } catch (IOException ioe) {
      ioErrs.increment(1);
      if (mapId == null || mapOutput == null) {
        LOG.info(
            "fetcher#"
                + id
                + " failed to read map header"
                + mapId
                + " decomp: "
                + decompressedLength
                + ", "
                + compressedLength,
            ioe);
        if (mapId == null) {
          return remaining.toArray(new TaskAttemptID[remaining.size()]);
        } else {
          return new TaskAttemptID[] {mapId};
        }
      }

      LOG.warn("Failed to shuffle output of " + mapId + " from " + host.getHostName(), ioe);

      // Inform the shuffle-scheduler
      mapOutput.abort();
      metrics.failedFetch();
      return new TaskAttemptID[] {mapId};
    }
  }