public void run() { try { while (!stopped && !Thread.currentThread().isInterrupted()) { MapHost host = null; try { // If merge is on, block merger.waitForInMemoryMerge(); // Get a host to shuffle from host = scheduler.getHost(); metrics.threadBusy(); // Shuffle copyFromHost(host); } finally { if (host != null) { scheduler.freeHost(host); metrics.threadFree(); } } } } catch (InterruptedException ie) { return; } catch (Throwable t) { shuffle.reportException(t); } }
private InputAttemptIdentifier[] copyMapOutput( MapHost host, DataInputStream input, Set<InputAttemptIdentifier> remaining) { MapOutput mapOutput = null; InputAttemptIdentifier srcAttemptId = null; long decompressedLength = -1; long compressedLength = -1; try { long startTime = System.currentTimeMillis(); int forReduce = -1; // Read the shuffle header try { ShuffleHeader header = new ShuffleHeader(); header.readFields(input); srcAttemptId = scheduler.getIdentifierForFetchedOutput(header.mapId, header.forReduce); compressedLength = header.compressedLength; decompressedLength = header.uncompressedLength; forReduce = header.forReduce; } catch (IllegalArgumentException e) { badIdErrs.increment(1); LOG.warn("Invalid map id ", e); // Don't know which one was bad, so consider all of them as bad return remaining.toArray(new InputAttemptIdentifier[remaining.size()]); } // Do some basic sanity verification if (!verifySanity(compressedLength, decompressedLength, forReduce, remaining, srcAttemptId)) { return new InputAttemptIdentifier[] {srcAttemptId}; } if (LOG.isDebugEnabled()) { LOG.debug( "header: " + srcAttemptId + ", len: " + compressedLength + ", decomp len: " + decompressedLength); } // Get the location for the map output - either in-memory or on-disk mapOutput = merger.reserve(srcAttemptId, decompressedLength, id); // Check if we can shuffle *now* ... if (mapOutput.getType() == Type.WAIT) { LOG.info("fetcher#" + id + " - MergerManager returned Status.WAIT ..."); // Not an error but wait to process data. return EMPTY_ATTEMPT_ID_ARRAY; } // Go! LOG.info( "fetcher#" + id + " about to shuffle output of map " + mapOutput.getAttemptIdentifier() + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput.getType()); if (mapOutput.getType() == Type.MEMORY) { shuffleToMemory(host, mapOutput, input, (int) decompressedLength, (int) compressedLength); } else { shuffleToDisk(host, mapOutput, input, compressedLength); } // Inform the shuffle scheduler long endTime = System.currentTimeMillis(); scheduler.copySucceeded(srcAttemptId, host, compressedLength, endTime - startTime, mapOutput); // Note successful shuffle remaining.remove(srcAttemptId); metrics.successFetch(); return null; } catch (IOException ioe) { ioErrs.increment(1); if (srcAttemptId == null || mapOutput == null) { LOG.info( "fetcher#" + id + " failed to read map header" + srcAttemptId + " decomp: " + decompressedLength + ", " + compressedLength, ioe); if (srcAttemptId == null) { return remaining.toArray(new InputAttemptIdentifier[remaining.size()]); } else { return new InputAttemptIdentifier[] {srcAttemptId}; } } LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host.getHostName(), ioe); // Inform the shuffle-scheduler mapOutput.abort(); metrics.failedFetch(); return new InputAttemptIdentifier[] {srcAttemptId}; } }
/** * The crux of the matter... * * @param host {@link MapHost} from which we need to shuffle available map-outputs. */ @VisibleForTesting protected void copyFromHost(MapHost host) throws IOException { // Get completed maps on 'host' List<InputAttemptIdentifier> srcAttempts = scheduler.getMapsForHost(host); // Sanity check to catch hosts with only 'OBSOLETE' maps, // especially at the tail of large jobs if (srcAttempts.size() == 0) { return; } if (LOG.isDebugEnabled()) { LOG.debug("Fetcher " + id + " going to fetch from " + host + " for: " + srcAttempts); } // List of maps to be fetched yet Set<InputAttemptIdentifier> remaining = new HashSet<InputAttemptIdentifier>(srcAttempts); // Construct the url and connect DataInputStream input; boolean connectSucceeded = false; try { URL url = getMapOutputURL(host, srcAttempts); HttpURLConnection connection = openConnection(url); // generate hash of the url String msgToEncode = SecureShuffleUtils.buildMsgFrom(url); String encHash = SecureShuffleUtils.hashFromString(msgToEncode, jobTokenSecret); // put url hash into http header connection.addRequestProperty(SecureShuffleUtils.HTTP_HEADER_URL_HASH, encHash); // set the read timeout connection.setReadTimeout(readTimeout); // put shuffle version into http header connection.addRequestProperty( ShuffleHeader.HTTP_HEADER_NAME, ShuffleHeader.DEFAULT_HTTP_HEADER_NAME); connection.addRequestProperty( ShuffleHeader.HTTP_HEADER_VERSION, ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); connect(connection, connectionTimeout); connectSucceeded = true; input = new DataInputStream(connection.getInputStream()); // Validate response code int rc = connection.getResponseCode(); if (rc != HttpURLConnection.HTTP_OK) { throw new IOException( "Got invalid response code " + rc + " from " + url + ": " + connection.getResponseMessage()); } // get the shuffle version if (!ShuffleHeader.DEFAULT_HTTP_HEADER_NAME.equals( connection.getHeaderField(ShuffleHeader.HTTP_HEADER_NAME)) || !ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION.equals( connection.getHeaderField(ShuffleHeader.HTTP_HEADER_VERSION))) { throw new IOException("Incompatible shuffle response version"); } // get the replyHash which is HMac of the encHash we sent to the server String replyHash = connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH); if (replyHash == null) { throw new IOException("security validation of TT Map output failed"); } LOG.debug("url=" + msgToEncode + ";encHash=" + encHash + ";replyHash=" + replyHash); // verify that replyHash is HMac of encHash SecureShuffleUtils.verifyReply(replyHash, encHash, jobTokenSecret); LOG.info("for url=" + msgToEncode + " sent hash and receievd reply"); } catch (IOException ie) { ioErrs.increment(1); LOG.warn("Failed to connect to " + host + " with " + remaining.size() + " map outputs", ie); // If connect did not succeed, just mark all the maps as failed, // indirectly penalizing the host if (!connectSucceeded) { for (InputAttemptIdentifier left : remaining) { scheduler.copyFailed(left, host, connectSucceeded); } } else { // If we got a read error at this stage, it implies there was a problem // with the first map, typically lost map. So, penalize only that map // and add the rest InputAttemptIdentifier firstMap = srcAttempts.get(0); scheduler.copyFailed(firstMap, host, connectSucceeded); } // Add back all the remaining maps, WITHOUT marking them as failed for (InputAttemptIdentifier left : remaining) { // TODO Should the first one be skipped ? scheduler.putBackKnownMapOutput(host, left); } return; } try { // Loop through available map-outputs and fetch them // On any error, faildTasks is not null and we exit // after putting back the remaining maps to the // yet_to_be_fetched list and marking the failed tasks. InputAttemptIdentifier[] failedTasks = null; while (!remaining.isEmpty() && failedTasks == null) { failedTasks = copyMapOutput(host, input, remaining); } if (failedTasks != null && failedTasks.length > 0) { LOG.warn("copyMapOutput failed for tasks " + Arrays.toString(failedTasks)); for (InputAttemptIdentifier left : failedTasks) { scheduler.copyFailed(left, host, true); } } IOUtils.cleanup(LOG, input); // Sanity check if (failedTasks == null && !remaining.isEmpty()) { throw new IOException( "server didn't return all expected map outputs: " + remaining.size() + " left."); } } finally { for (InputAttemptIdentifier left : remaining) { scheduler.putBackKnownMapOutput(host, left); } } }