@Override public void fetchFailed( String host, InputAttemptIdentifier srcAttemptIdentifier, boolean connectFailed) { // TODO NEWTEZ. Implement logic to report fetch failures after a threshold. // For now, reporting immediately. LOG.info( "Fetch failed for src: " + srcAttemptIdentifier + "InputIdentifier: " + srcAttemptIdentifier + ", connectFailed: " + connectFailed); failedShufflesCounter.increment(1); if (srcAttemptIdentifier == null) { String message = "Received fetchFailure for an unknown src (null)"; LOG.fatal(message); inputContext.fatalError(null, message); } else { InputReadErrorEvent readError = new InputReadErrorEvent( "Fetch failure while fetching from " + TezRuntimeUtils.getTaskAttemptIdentifier( inputContext.getSourceVertexName(), srcAttemptIdentifier.getInputIdentifier().getInputIndex(), srcAttemptIdentifier.getAttemptNumber()), srcAttemptIdentifier.getInputIdentifier().getInputIndex(), srcAttemptIdentifier.getAttemptNumber()); List<Event> failedEvents = Lists.newArrayListWithCapacity(1); failedEvents.add(readError); inputContext.sendEvents(failedEvents); } }
public void addKnownInput( String hostName, int port, InputAttemptIdentifier srcAttemptIdentifier, int srcPhysicalIndex) { String identifier = InputHost.createIdentifier(hostName, port); InputHost host = knownSrcHosts.get(identifier); if (host == null) { host = new InputHost(hostName, port, inputContext.getApplicationId(), srcPhysicalIndex); assert identifier.equals(host.getIdentifier()); InputHost old = knownSrcHosts.putIfAbsent(identifier, host); if (old != null) { host = old; } } if (LOG.isDebugEnabled()) { LOG.debug("Adding input: " + srcAttemptIdentifier + ", to host: " + host); } host.addKnownInput(srcAttemptIdentifier); lock.lock(); try { boolean added = pendingHosts.offer(host); if (!added) { String errorMessage = "Unable to add host: " + host.getIdentifier() + " to pending queue"; LOG.error(errorMessage); throw new TezUncheckedException(errorMessage); } wakeLoop.signal(); } finally { lock.unlock(); } }
private void registerCompletedInput(FetchedInput fetchedInput) { lock.lock(); try { completedInputSet.add(fetchedInput.getInputAttemptIdentifier().getInputIdentifier()); completedInputs.add(fetchedInput); if (!inputReadyNotificationSent.getAndSet(true)) { // TODO Should eventually be controlled by Inputs which are processing the data. inputContext.inputIsReady(); } int numComplete = numCompletedInputs.incrementAndGet(); if (numComplete == numInputs) { LOG.info("All inputs fetched for input vertex : " + inputContext.getSourceVertexName()); } } finally { lock.unlock(); } }
private Fetcher constructFetcherForHost(InputHost inputHost) { FetcherBuilder fetcherBuilder = new FetcherBuilder( ShuffleManager.this, httpConnectionParams, inputManager, inputContext.getApplicationId(), shuffleSecret, srcNameTrimmed); if (codec != null) { fetcherBuilder.setCompressionParameters(codec); } fetcherBuilder.setIFileParams(ifileReadAhead, ifileReadAheadLength); // Remove obsolete inputs from the list being given to the fetcher. Also // remove from the obsolete list. List<InputAttemptIdentifier> pendingInputsForHost = inputHost.clearAndGetPendingInputs(); for (Iterator<InputAttemptIdentifier> inputIter = pendingInputsForHost.iterator(); inputIter.hasNext(); ) { InputAttemptIdentifier input = inputIter.next(); // Avoid adding attempts which have already completed. if (completedInputSet.contains(input.getInputIdentifier())) { inputIter.remove(); continue; } // Avoid adding attempts which have been marked as OBSOLETE if (obsoletedInputs.contains(input)) { inputIter.remove(); } } // TODO NEWTEZ Maybe limit the number of inputs being given to a single // fetcher, especially in the case where #hosts < #fetchers fetcherBuilder.assignWork( inputHost.getHost(), inputHost.getPort(), inputHost.getSrcPhysicalIndex(), pendingInputsForHost); LOG.info( "Created Fetcher for host: " + inputHost.getHost() + ", with inputs: " + pendingInputsForHost); return fetcherBuilder.build(); }
public ShuffleManager( TezInputContext inputContext, Configuration conf, int numInputs, int bufferSize, boolean ifileReadAheadEnabled, int ifileReadAheadLength, CompressionCodec codec, FetchedInputAllocator inputAllocator) throws IOException { this.inputContext = inputContext; this.numInputs = numInputs; this.shuffledInputsCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_SHUFFLED_INPUTS); this.failedShufflesCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_FAILED_SHUFFLE_INPUTS); this.bytesShuffledCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES); this.decompressedDataSizeCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_DECOMPRESSED); this.bytesShuffledToDiskCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_DISK); this.bytesShuffledToMemCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_MEM); this.ifileBufferSize = bufferSize; this.ifileReadAhead = ifileReadAheadEnabled; this.ifileReadAheadLength = ifileReadAheadLength; this.codec = codec; this.inputManager = inputAllocator; this.srcNameTrimmed = TezUtils.cleanVertexName(inputContext.getSourceVertexName()); completedInputSet = Collections.newSetFromMap(new ConcurrentHashMap<InputIdentifier, Boolean>(numInputs)); completedInputs = new LinkedBlockingQueue<FetchedInput>(numInputs); knownSrcHosts = new ConcurrentHashMap<String, InputHost>(); pendingHosts = new LinkedBlockingQueue<InputHost>(); obsoletedInputs = Collections.newSetFromMap(new ConcurrentHashMap<InputAttemptIdentifier, Boolean>()); runningFetchers = Collections.newSetFromMap(new ConcurrentHashMap<Fetcher, Boolean>()); int maxConfiguredFetchers = conf.getInt( TezJobConfig.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES, TezJobConfig.TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES_DEFAULT); this.numFetchers = Math.min(maxConfiguredFetchers, numInputs); ExecutorService fetcherRawExecutor = Executors.newFixedThreadPool( numFetchers, new ThreadFactoryBuilder() .setDaemon(true) .setNameFormat("Fetcher [" + srcNameTrimmed + "] #%d") .build()); this.fetcherExecutor = MoreExecutors.listeningDecorator(fetcherRawExecutor); ExecutorService schedulerRawExecutor = Executors.newFixedThreadPool( 1, new ThreadFactoryBuilder() .setDaemon(true) .setNameFormat("ShuffleRunner [" + srcNameTrimmed + "]") .build()); this.schedulerExecutor = MoreExecutors.listeningDecorator(schedulerRawExecutor); this.startTime = System.currentTimeMillis(); this.lastProgressTime = startTime; this.shuffleSecret = ShuffleUtils.getJobTokenSecretFromTokenBytes( inputContext.getServiceConsumerMetaData( TezConfiguration.TEZ_SHUFFLE_HANDLER_SERVICE_ID)); httpConnectionParams = ShuffleUtils.constructHttpShuffleConnectionParams(conf); LOG.info( this.getClass().getSimpleName() + " : numInputs=" + numInputs + ", compressionCodec=" + (codec == null ? "NoCompressionCodec" : codec.getClass().getName()) + ", numFetchers=" + numFetchers + ", ifileBufferSize=" + ifileBufferSize + ", ifileReadAheadEnabled=" + ifileReadAhead + ", ifileReadAheadLength=" + ifileReadAheadLength + ", " + httpConnectionParams.toString()); }
public Fetcher( Configuration job, ShuffleScheduler scheduler, MergeManager merger, ShuffleClientMetrics metrics, Shuffle shuffle, SecretKey jobTokenSecret, TezInputContext inputContext) throws IOException { this.job = job; this.scheduler = scheduler; this.merger = merger; this.metrics = metrics; this.shuffle = shuffle; this.id = ++nextId; this.jobTokenSecret = jobTokenSecret; ioErrs = inputContext .getCounters() .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.IO_ERROR.toString()); wrongLengthErrs = inputContext .getCounters() .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_LENGTH.toString()); badIdErrs = inputContext .getCounters() .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.BAD_ID.toString()); wrongMapErrs = inputContext .getCounters() .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_MAP.toString()); connectionErrs = inputContext .getCounters() .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.CONNECTION.toString()); wrongReduceErrs = inputContext .getCounters() .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_REDUCE.toString()); if (ConfigUtils.isIntermediateInputCompressed(job)) { Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(job, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, job); decompressor = CodecPool.getDecompressor(codec); } else { codec = null; decompressor = null; } this.connectionTimeout = job.getInt( TezJobConfig.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_STALLED_COPY_TIMEOUT); this.readTimeout = job.getInt( TezJobConfig.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT); setName("fetcher#" + id); setDaemon(true); synchronized (Fetcher.class) { sslShuffle = job.getBoolean( TezJobConfig.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_ENABLE_SSL); if (sslShuffle && sslFactory == null) { sslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, job); try { sslFactory.init(); } catch (Exception ex) { sslFactory.destroy(); throw new RuntimeException(ex); } } } }