public void run() { try { while (!stopped && !Thread.currentThread().isInterrupted()) { MapHost host = null; try { // If merge is on, block merger.waitForInMemoryMerge(); // Get a host to shuffle from host = scheduler.getHost(); metrics.threadBusy(); // Shuffle copyFromHost(host); } finally { if (host != null) { scheduler.freeHost(host); metrics.threadFree(); } } } } catch (InterruptedException ie) { return; } catch (Throwable t) { shuffle.reportException(t); } }
private void shuffleToDisk( MapHost host, MapOutput mapOutput, InputStream input, long compressedLength) throws IOException { // Copy data to local-disk OutputStream output = mapOutput.getDisk(); long bytesLeft = compressedLength; try { final int BYTES_TO_READ = 64 * 1024; byte[] buf = new byte[BYTES_TO_READ]; while (bytesLeft > 0) { int n = input.read(buf, 0, (int) Math.min(bytesLeft, BYTES_TO_READ)); if (n < 0) { throw new IOException( "read past end of stream reading " + mapOutput.getAttemptIdentifier()); } output.write(buf, 0, n); bytesLeft -= n; metrics.inputBytes(n); } LOG.info( "Read " + (compressedLength - bytesLeft) + " bytes from map-output for " + mapOutput.getAttemptIdentifier()); output.close(); } catch (IOException ioe) { // Close the streams IOUtils.cleanup(LOG, input, output); // Re-throw throw ioe; } // Sanity check if (bytesLeft != 0) { throw new IOException( "Incomplete map output received for " + mapOutput.getAttemptIdentifier() + " from " + host.getHostName() + " (" + bytesLeft + " bytes missing of " + compressedLength + ")"); } }
private void shuffleToMemory( MapHost host, MapOutput mapOutput, InputStream input, int decompressedLength, int compressedLength) throws IOException { IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, job); input = checksumIn; // Are map-outputs compressed? if (codec != null) { decompressor.reset(); input = codec.createInputStream(input, decompressor); } // Copy map-output into an in-memory buffer byte[] shuffleData = mapOutput.getMemory(); try { IOUtils.readFully(input, shuffleData, 0, shuffleData.length); metrics.inputBytes(shuffleData.length); LOG.info( "Read " + shuffleData.length + " bytes from map-output for " + mapOutput.getAttemptIdentifier()); } catch (IOException ioe) { // Close the streams IOUtils.cleanup(LOG, input); // Re-throw throw ioe; } }
private InputAttemptIdentifier[] copyMapOutput( MapHost host, DataInputStream input, Set<InputAttemptIdentifier> remaining) { MapOutput mapOutput = null; InputAttemptIdentifier srcAttemptId = null; long decompressedLength = -1; long compressedLength = -1; try { long startTime = System.currentTimeMillis(); int forReduce = -1; // Read the shuffle header try { ShuffleHeader header = new ShuffleHeader(); header.readFields(input); srcAttemptId = scheduler.getIdentifierForFetchedOutput(header.mapId, header.forReduce); compressedLength = header.compressedLength; decompressedLength = header.uncompressedLength; forReduce = header.forReduce; } catch (IllegalArgumentException e) { badIdErrs.increment(1); LOG.warn("Invalid map id ", e); // Don't know which one was bad, so consider all of them as bad return remaining.toArray(new InputAttemptIdentifier[remaining.size()]); } // Do some basic sanity verification if (!verifySanity(compressedLength, decompressedLength, forReduce, remaining, srcAttemptId)) { return new InputAttemptIdentifier[] {srcAttemptId}; } if (LOG.isDebugEnabled()) { LOG.debug( "header: " + srcAttemptId + ", len: " + compressedLength + ", decomp len: " + decompressedLength); } // Get the location for the map output - either in-memory or on-disk mapOutput = merger.reserve(srcAttemptId, decompressedLength, id); // Check if we can shuffle *now* ... if (mapOutput.getType() == Type.WAIT) { LOG.info("fetcher#" + id + " - MergerManager returned Status.WAIT ..."); // Not an error but wait to process data. return EMPTY_ATTEMPT_ID_ARRAY; } // Go! LOG.info( "fetcher#" + id + " about to shuffle output of map " + mapOutput.getAttemptIdentifier() + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput.getType()); if (mapOutput.getType() == Type.MEMORY) { shuffleToMemory(host, mapOutput, input, (int) decompressedLength, (int) compressedLength); } else { shuffleToDisk(host, mapOutput, input, compressedLength); } // Inform the shuffle scheduler long endTime = System.currentTimeMillis(); scheduler.copySucceeded(srcAttemptId, host, compressedLength, endTime - startTime, mapOutput); // Note successful shuffle remaining.remove(srcAttemptId); metrics.successFetch(); return null; } catch (IOException ioe) { ioErrs.increment(1); if (srcAttemptId == null || mapOutput == null) { LOG.info( "fetcher#" + id + " failed to read map header" + srcAttemptId + " decomp: " + decompressedLength + ", " + compressedLength, ioe); if (srcAttemptId == null) { return remaining.toArray(new InputAttemptIdentifier[remaining.size()]); } else { return new InputAttemptIdentifier[] {srcAttemptId}; } } LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host.getHostName(), ioe); // Inform the shuffle-scheduler mapOutput.abort(); metrics.failedFetch(); return new InputAttemptIdentifier[] {srcAttemptId}; } }
private TaskAttemptID[] copyMapOutput( MapHost host, DataInputStream input, Set<TaskAttemptID> remaining) { MapOutput<K, V> mapOutput = null; TaskAttemptID mapId = null; long decompressedLength = -1; long compressedLength = -1; try { long startTime = System.currentTimeMillis(); int forReduce = -1; // Read the shuffle header try { ShuffleHeader header = new ShuffleHeader(); header.readFields(input); mapId = TaskAttemptID.forName(header.mapId); compressedLength = header.compressedLength; decompressedLength = header.uncompressedLength; forReduce = header.forReduce; } catch (IllegalArgumentException e) { badIdErrs.increment(1); LOG.warn("Invalid map id ", e); // Don't know which one was bad, so consider all of them as bad return remaining.toArray(new TaskAttemptID[remaining.size()]); } InputStream is = input; is = CryptoUtils.wrapIfNecessary(jobConf, is, compressedLength); compressedLength -= CryptoUtils.cryptoPadding(jobConf); decompressedLength -= CryptoUtils.cryptoPadding(jobConf); // Do some basic sanity verification if (!verifySanity(compressedLength, decompressedLength, forReduce, remaining, mapId)) { return new TaskAttemptID[] {mapId}; } if (LOG.isDebugEnabled()) { LOG.debug( "header: " + mapId + ", len: " + compressedLength + ", decomp len: " + decompressedLength); } // Get the location for the map output - either in-memory or on-disk try { mapOutput = merger.reserve(mapId, decompressedLength, id); } catch (IOException ioe) { // kill this reduce attempt ioErrs.increment(1); scheduler.reportLocalError(ioe); return EMPTY_ATTEMPT_ID_ARRAY; } // Check if we can shuffle *now* ... if (mapOutput == null) { LOG.info("fetcher#" + id + " - MergeManager returned status WAIT ..."); // Not an error but wait to process data. return EMPTY_ATTEMPT_ID_ARRAY; } // The codec for lz0,lz4,snappy,bz2,etc. throw java.lang.InternalError // on decompression failures. Catching and re-throwing as IOException // to allow fetch failure logic to be processed try { // Go! LOG.info( "fetcher#" + id + " about to shuffle output of map " + mapOutput.getMapId() + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput.getDescription()); mapOutput.shuffle(host, is, compressedLength, decompressedLength, metrics, reporter); } catch (java.lang.InternalError e) { LOG.warn("Failed to shuffle for fetcher#" + id, e); throw new IOException(e); } // Inform the shuffle scheduler long endTime = System.currentTimeMillis(); scheduler.copySucceeded(mapId, host, compressedLength, endTime - startTime, mapOutput); // Note successful shuffle remaining.remove(mapId); metrics.successFetch(); return null; } catch (IOException ioe) { ioErrs.increment(1); if (mapId == null || mapOutput == null) { LOG.info( "fetcher#" + id + " failed to read map header" + mapId + " decomp: " + decompressedLength + ", " + compressedLength, ioe); if (mapId == null) { return remaining.toArray(new TaskAttemptID[remaining.size()]); } else { return new TaskAttemptID[] {mapId}; } } LOG.warn("Failed to shuffle output of " + mapId + " from " + host.getHostName(), ioe); // Inform the shuffle-scheduler mapOutput.abort(); metrics.failedFetch(); return new TaskAttemptID[] {mapId}; } }