@Override public void fetchFailed( String host, InputAttemptIdentifier srcAttemptIdentifier, boolean connectFailed) { // TODO NEWTEZ. Implement logic to report fetch failures after a threshold. // For now, reporting immediately. LOG.info( "Fetch failed for src: " + srcAttemptIdentifier + "InputIdentifier: " + srcAttemptIdentifier + ", connectFailed: " + connectFailed); failedShufflesCounter.increment(1); if (srcAttemptIdentifier == null) { String message = "Received fetchFailure for an unknown src (null)"; LOG.fatal(message); inputContext.fatalError(null, message); } else { InputReadErrorEvent readError = new InputReadErrorEvent( "Fetch failure while fetching from " + TezRuntimeUtils.getTaskAttemptIdentifier( inputContext.getSourceVertexName(), srcAttemptIdentifier.getInputIdentifier().getInputIndex(), srcAttemptIdentifier.getAttemptNumber()), srcAttemptIdentifier.getInputIdentifier().getInputIndex(), srcAttemptIdentifier.getAttemptNumber()); List<Event> failedEvents = Lists.newArrayListWithCapacity(1); failedEvents.add(readError); inputContext.sendEvents(failedEvents); } }
@Override public void fetchSucceeded( String host, InputAttemptIdentifier srcAttemptIdentifier, FetchedInput fetchedInput, long fetchedBytes, long decompressedLength, long copyDuration) throws IOException { InputIdentifier inputIdentifier = srcAttemptIdentifier.getInputIdentifier(); LOG.info( "Completed fetch for attempt: " + srcAttemptIdentifier + " to " + fetchedInput.getType()); // Count irrespective of whether this is a copy of an already fetched input lock.lock(); try { lastProgressTime = System.currentTimeMillis(); } finally { lock.unlock(); } boolean committed = false; if (!completedInputSet.contains(inputIdentifier)) { synchronized (completedInputSet) { if (!completedInputSet.contains(inputIdentifier)) { fetchedInput.commit(); committed = true; // Processing counters for completed and commit fetches only. Need // additional counters for excessive fetches - which primarily comes // in after speculation or retries. shuffledInputsCounter.increment(1); bytesShuffledCounter.increment(fetchedBytes); if (fetchedInput.getType() == Type.MEMORY) { bytesShuffledToMemCounter.increment(fetchedBytes); } else { bytesShuffledToDiskCounter.increment(fetchedBytes); } decompressedDataSizeCounter.increment(decompressedLength); registerCompletedInput(fetchedInput); } } } if (!committed) { fetchedInput.abort(); // If this fails, the fetcher may attempt another abort. } else { lock.lock(); try { // Signal the wakeLoop to check for termination. wakeLoop.signal(); } finally { lock.unlock(); } } // TODO NEWTEZ Maybe inform fetchers, in case they have an alternate attempt of the same task in // their queue. }
private Fetcher constructFetcherForHost(InputHost inputHost) { FetcherBuilder fetcherBuilder = new FetcherBuilder( ShuffleManager.this, httpConnectionParams, inputManager, inputContext.getApplicationId(), shuffleSecret, srcNameTrimmed); if (codec != null) { fetcherBuilder.setCompressionParameters(codec); } fetcherBuilder.setIFileParams(ifileReadAhead, ifileReadAheadLength); // Remove obsolete inputs from the list being given to the fetcher. Also // remove from the obsolete list. List<InputAttemptIdentifier> pendingInputsForHost = inputHost.clearAndGetPendingInputs(); for (Iterator<InputAttemptIdentifier> inputIter = pendingInputsForHost.iterator(); inputIter.hasNext(); ) { InputAttemptIdentifier input = inputIter.next(); // Avoid adding attempts which have already completed. if (completedInputSet.contains(input.getInputIdentifier())) { inputIter.remove(); continue; } // Avoid adding attempts which have been marked as OBSOLETE if (obsoletedInputs.contains(input)) { inputIter.remove(); } } // TODO NEWTEZ Maybe limit the number of inputs being given to a single // fetcher, especially in the case where #hosts < #fetchers fetcherBuilder.assignWork( inputHost.getHost(), inputHost.getPort(), inputHost.getSrcPhysicalIndex(), pendingInputsForHost); LOG.info( "Created Fetcher for host: " + inputHost.getHost() + ", with inputs: " + pendingInputsForHost); return fetcherBuilder.build(); }
/** * Create the map-output-url. This will contain all the map ids separated by commas * * @param host * @param maps * @return * @throws MalformedURLException */ private URL getMapOutputURL(MapHost host, List<InputAttemptIdentifier> srcAttempts) throws MalformedURLException { // Get the base url StringBuffer url = new StringBuffer(host.getBaseUrl()); boolean first = true; for (InputAttemptIdentifier mapId : srcAttempts) { if (!first) { url.append(","); } url.append(mapId.getPathComponent()); first = false; } if (LOG.isDebugEnabled()) { LOG.debug("MapOutput URL for " + host + " -> " + url.toString()); } return new URL(url.toString()); }
public void addCompletedInputWithNoData(InputAttemptIdentifier srcAttemptIdentifier) { InputIdentifier inputIdentifier = srcAttemptIdentifier.getInputIdentifier(); LOG.info("No input data exists for SrcTask: " + inputIdentifier + ". Marking as complete."); if (!completedInputSet.contains(inputIdentifier)) { synchronized (completedInputSet) { if (!completedInputSet.contains(inputIdentifier)) { registerCompletedInput(new NullFetchedInput(srcAttemptIdentifier)); } } } // Awake the loop to check for termination. lock.lock(); try { wakeLoop.signal(); } finally { lock.unlock(); } }
public void addCompletedInputWithData( InputAttemptIdentifier srcAttemptIdentifier, FetchedInput fetchedInput) throws IOException { InputIdentifier inputIdentifier = srcAttemptIdentifier.getInputIdentifier(); LOG.info("Received Data via Event: " + srcAttemptIdentifier + " to " + fetchedInput.getType()); // Count irrespective of whether this is a copy of an already fetched input lock.lock(); try { lastProgressTime = System.currentTimeMillis(); } finally { lock.unlock(); } boolean committed = false; if (!completedInputSet.contains(inputIdentifier)) { synchronized (completedInputSet) { if (!completedInputSet.contains(inputIdentifier)) { fetchedInput.commit(); committed = true; registerCompletedInput(fetchedInput); } } } if (!committed) { fetchedInput.abort(); // If this fails, the fetcher may attempt another // abort. } else { lock.lock(); try { // Signal the wakeLoop to check for termination. wakeLoop.signal(); } finally { lock.unlock(); } } }