private InputAttemptIdentifier[] copyMapOutput( MapHost host, DataInputStream input, Set<InputAttemptIdentifier> remaining) { MapOutput mapOutput = null; InputAttemptIdentifier srcAttemptId = null; long decompressedLength = -1; long compressedLength = -1; try { long startTime = System.currentTimeMillis(); int forReduce = -1; // Read the shuffle header try { ShuffleHeader header = new ShuffleHeader(); header.readFields(input); srcAttemptId = scheduler.getIdentifierForFetchedOutput(header.mapId, header.forReduce); compressedLength = header.compressedLength; decompressedLength = header.uncompressedLength; forReduce = header.forReduce; } catch (IllegalArgumentException e) { badIdErrs.increment(1); LOG.warn("Invalid map id ", e); // Don't know which one was bad, so consider all of them as bad return remaining.toArray(new InputAttemptIdentifier[remaining.size()]); } // Do some basic sanity verification if (!verifySanity(compressedLength, decompressedLength, forReduce, remaining, srcAttemptId)) { return new InputAttemptIdentifier[] {srcAttemptId}; } if (LOG.isDebugEnabled()) { LOG.debug( "header: " + srcAttemptId + ", len: " + compressedLength + ", decomp len: " + decompressedLength); } // Get the location for the map output - either in-memory or on-disk mapOutput = merger.reserve(srcAttemptId, decompressedLength, id); // Check if we can shuffle *now* ... if (mapOutput.getType() == Type.WAIT) { LOG.info("fetcher#" + id + " - MergerManager returned Status.WAIT ..."); // Not an error but wait to process data. return EMPTY_ATTEMPT_ID_ARRAY; } // Go! LOG.info( "fetcher#" + id + " about to shuffle output of map " + mapOutput.getAttemptIdentifier() + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput.getType()); if (mapOutput.getType() == Type.MEMORY) { shuffleToMemory(host, mapOutput, input, (int) decompressedLength, (int) compressedLength); } else { shuffleToDisk(host, mapOutput, input, compressedLength); } // Inform the shuffle scheduler long endTime = System.currentTimeMillis(); scheduler.copySucceeded(srcAttemptId, host, compressedLength, endTime - startTime, mapOutput); // Note successful shuffle remaining.remove(srcAttemptId); metrics.successFetch(); return null; } catch (IOException ioe) { ioErrs.increment(1); if (srcAttemptId == null || mapOutput == null) { LOG.info( "fetcher#" + id + " failed to read map header" + srcAttemptId + " decomp: " + decompressedLength + ", " + compressedLength, ioe); if (srcAttemptId == null) { return remaining.toArray(new InputAttemptIdentifier[remaining.size()]); } else { return new InputAttemptIdentifier[] {srcAttemptId}; } } LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host.getHostName(), ioe); // Inform the shuffle-scheduler mapOutput.abort(); metrics.failedFetch(); return new InputAttemptIdentifier[] {srcAttemptId}; } }
private TaskAttemptID[] copyMapOutput( MapHost host, DataInputStream input, Set<TaskAttemptID> remaining) { MapOutput<K, V> mapOutput = null; TaskAttemptID mapId = null; long decompressedLength = -1; long compressedLength = -1; try { long startTime = System.currentTimeMillis(); int forReduce = -1; // Read the shuffle header try { ShuffleHeader header = new ShuffleHeader(); header.readFields(input); mapId = TaskAttemptID.forName(header.mapId); compressedLength = header.compressedLength; decompressedLength = header.uncompressedLength; forReduce = header.forReduce; } catch (IllegalArgumentException e) { badIdErrs.increment(1); LOG.warn("Invalid map id ", e); // Don't know which one was bad, so consider all of them as bad return remaining.toArray(new TaskAttemptID[remaining.size()]); } InputStream is = input; is = CryptoUtils.wrapIfNecessary(jobConf, is, compressedLength); compressedLength -= CryptoUtils.cryptoPadding(jobConf); decompressedLength -= CryptoUtils.cryptoPadding(jobConf); // Do some basic sanity verification if (!verifySanity(compressedLength, decompressedLength, forReduce, remaining, mapId)) { return new TaskAttemptID[] {mapId}; } if (LOG.isDebugEnabled()) { LOG.debug( "header: " + mapId + ", len: " + compressedLength + ", decomp len: " + decompressedLength); } // Get the location for the map output - either in-memory or on-disk try { mapOutput = merger.reserve(mapId, decompressedLength, id); } catch (IOException ioe) { // kill this reduce attempt ioErrs.increment(1); scheduler.reportLocalError(ioe); return EMPTY_ATTEMPT_ID_ARRAY; } // Check if we can shuffle *now* ... if (mapOutput == null) { LOG.info("fetcher#" + id + " - MergeManager returned status WAIT ..."); // Not an error but wait to process data. return EMPTY_ATTEMPT_ID_ARRAY; } // The codec for lz0,lz4,snappy,bz2,etc. throw java.lang.InternalError // on decompression failures. Catching and re-throwing as IOException // to allow fetch failure logic to be processed try { // Go! LOG.info( "fetcher#" + id + " about to shuffle output of map " + mapOutput.getMapId() + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput.getDescription()); mapOutput.shuffle(host, is, compressedLength, decompressedLength, metrics, reporter); } catch (java.lang.InternalError e) { LOG.warn("Failed to shuffle for fetcher#" + id, e); throw new IOException(e); } // Inform the shuffle scheduler long endTime = System.currentTimeMillis(); scheduler.copySucceeded(mapId, host, compressedLength, endTime - startTime, mapOutput); // Note successful shuffle remaining.remove(mapId); metrics.successFetch(); return null; } catch (IOException ioe) { ioErrs.increment(1); if (mapId == null || mapOutput == null) { LOG.info( "fetcher#" + id + " failed to read map header" + mapId + " decomp: " + decompressedLength + ", " + compressedLength, ioe); if (mapId == null) { return remaining.toArray(new TaskAttemptID[remaining.size()]); } else { return new TaskAttemptID[] {mapId}; } } LOG.warn("Failed to shuffle output of " + mapId + " from " + host.getHostName(), ioe); // Inform the shuffle-scheduler mapOutput.abort(); metrics.failedFetch(); return new TaskAttemptID[] {mapId}; } }