private void verifyCompressedFile(Path f, int expectedNumLines) throws IOException { Configuration conf = new Configuration(); conf.set("fs.default.name", "file:///"); FileSystem fs = FileSystem.get(conf); InputStream is = fs.open(f); CompressionCodecFactory ccf = new CompressionCodecFactory(conf); CompressionCodec codec = ccf.getCodec(f); LOG.info("gzip check codec is " + codec); Decompressor decompressor = CodecPool.getDecompressor(codec); if (null == decompressor) { LOG.info("Verifying gzip sanity with null decompressor"); } else { LOG.info("Verifying gzip sanity with decompressor: " + decompressor.toString()); } is = codec.createInputStream(is, decompressor); BufferedReader r = new BufferedReader(new InputStreamReader(is)); int numLines = 0; while (true) { String ln = r.readLine(); if (ln == null) { break; } numLines++; } r.close(); assertEquals("Did not read back correct number of lines", expectedNumLines, numLines); LOG.info("gzip sanity check returned " + numLines + " lines; ok."); }
public void returnDecompressor(Decompressor decompressor) { if (decompressor != null) { CodecPool.returnDecompressor(decompressor); if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) { decompressor.end(); } } }
public void returnDecompressor(Decompressor decompressor) { if (decompressor != null) { if (LOG.isTraceEnabled()) LOG.trace("Returning decompressor " + decompressor + " to pool."); CodecPool.returnDecompressor(decompressor); if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) { if (LOG.isTraceEnabled()) LOG.trace("Ending decompressor " + decompressor); decompressor.end(); } } }
public synchronized void close() throws IOException { try { if (in != null) { in.close(); } } finally { if (decompressor != null) { CodecPool.returnDecompressor(decompressor); decompressor = null; } } }
public Compressor getCompressor() { CompressionCodec codec = getCodec(conf); if (codec != null) { Compressor compressor = CodecPool.getCompressor(codec); if (LOG.isTraceEnabled()) LOG.trace("Retrieved compressor " + compressor + " from pool."); if (compressor != null) { if (compressor.finished()) { // Somebody returns the compressor to CodecPool but is still using it. LOG.warn("Compressor obtained from CodecPool is already finished()"); } compressor.reset(); } return compressor; } return null; }
public Compressor getCompressor() { CompressionCodec codec = getCodec(conf); if (codec != null) { Compressor compressor = CodecPool.getCompressor(codec); if (compressor != null) { if (compressor.finished()) { // Somebody returns the compressor to CodecPool but is still using // it. LOG.warn("Compressor obtained from CodecPool is already finished()"); // throw new AssertionError( // "Compressor obtained from CodecPool is already finished()"); } compressor.reset(); } return compressor; } return null; }
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { isCompressedInput = true; decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec) .createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { in = new SplitLineReader( codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); filePosition = fileIn; } } else { fileIn.seek(start); in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
public Fetcher( Configuration job, ShuffleScheduler scheduler, MergeManager merger, ShuffleClientMetrics metrics, Shuffle shuffle, SecretKey jobTokenSecret, TezInputContext inputContext) throws IOException { this.job = job; this.scheduler = scheduler; this.merger = merger; this.metrics = metrics; this.shuffle = shuffle; this.id = ++nextId; this.jobTokenSecret = jobTokenSecret; ioErrs = inputContext .getCounters() .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.IO_ERROR.toString()); wrongLengthErrs = inputContext .getCounters() .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_LENGTH.toString()); badIdErrs = inputContext .getCounters() .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.BAD_ID.toString()); wrongMapErrs = inputContext .getCounters() .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_MAP.toString()); connectionErrs = inputContext .getCounters() .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.CONNECTION.toString()); wrongReduceErrs = inputContext .getCounters() .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_REDUCE.toString()); if (ConfigUtils.isIntermediateInputCompressed(job)) { Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(job, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, job); decompressor = CodecPool.getDecompressor(codec); } else { codec = null; decompressor = null; } this.connectionTimeout = job.getInt( TezJobConfig.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_STALLED_COPY_TIMEOUT); this.readTimeout = job.getInt( TezJobConfig.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT); setName("fetcher#" + id); setDaemon(true); synchronized (Fetcher.class) { sslShuffle = job.getBoolean( TezJobConfig.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_ENABLE_SSL); if (sslShuffle && sslFactory == null) { sslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, job); try { sslFactory.init(); } catch (Exception ex) { sslFactory.destroy(); throw new RuntimeException(ex); } } } }
public void returnCompressor(Compressor compressor) { if (compressor != null) { if (LOG.isTraceEnabled()) LOG.trace("Returning compressor " + compressor + " to pool."); CodecPool.returnCompressor(compressor); } }
public void returnDecompressor(Decompressor decompressor) { if (decompressor != null) { CodecPool.returnDecompressor(decompressor); } }
public void returnCompressor(Compressor compressor) { if (compressor != null) { CodecPool.returnCompressor(compressor); } }
@Test public void testHadoop20JHParser() throws Exception { // Disabled if (true) return; final Configuration conf = new Configuration(); final FileSystem lfs = FileSystem.getLocal(conf); boolean success = false; final Path rootInputDir = new Path(System.getProperty("test.tools.input.dir", "")).makeQualified(lfs); final Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(lfs); final Path rootInputPath = new Path(rootInputDir, "rumen/small-trace-test"); final Path tempDir = new Path(rootTempDir, "TestHadoop20JHParser"); lfs.delete(tempDir, true); final Path inputPath = new Path(rootInputPath, "v20-single-input-log.gz"); final Path goldPath = new Path(rootInputPath, "v20-single-input-log-event-classes.text.gz"); InputStream inputLogStream = new PossiblyDecompressedInputStream(inputPath, conf); InputStream inputGoldStream = new PossiblyDecompressedInputStream(goldPath, conf); BufferedInputStream bis = new BufferedInputStream(inputLogStream); bis.mark(10000); Hadoop20JHParser parser = new Hadoop20JHParser(bis); final Path resultPath = new Path(tempDir, "result.text"); System.out.println("testHadoop20JHParser sent its output to " + resultPath); Compressor compressor; FileSystem fs = resultPath.getFileSystem(conf); CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(resultPath); OutputStream output; if (codec != null) { compressor = CodecPool.getCompressor(codec); output = codec.createOutputStream(fs.create(resultPath), compressor); } else { output = fs.create(resultPath); } PrintStream printStream = new PrintStream(output); try { assertEquals( "Hadoop20JHParser can't parse the test file", true, Hadoop20JHParser.canParse(inputLogStream)); bis.reset(); HistoryEvent event = parser.nextEvent(); while (event != null) { printStream.println(event.getClass().getCanonicalName()); event = parser.nextEvent(); } printStream.close(); LineReader goldLines = new LineReader(inputGoldStream); LineReader resultLines = new LineReader(new PossiblyDecompressedInputStream(resultPath, conf)); int lineNumber = 1; try { Text goldLine = new Text(); Text resultLine = new Text(); int goldRead = goldLines.readLine(goldLine); int resultRead = resultLines.readLine(resultLine); while (goldRead * resultRead != 0) { if (!goldLine.equals(resultLine)) { assertEquals("Type mismatch detected", goldLine, resultLine); break; } goldRead = goldLines.readLine(goldLine); resultRead = resultLines.readLine(resultLine); ++lineNumber; } if (goldRead != resultRead) { assertEquals( "the " + (goldRead > resultRead ? "gold" : resultRead) + " file contains more text at line " + lineNumber, goldRead, resultRead); } success = true; } finally { goldLines.close(); resultLines.close(); if (success) { lfs.delete(resultPath, false); } } } finally { if (parser == null) { inputLogStream.close(); } else { if (parser != null) { parser.close(); } } if (inputGoldStream != null) { inputGoldStream.close(); } // it's okay to do this twice [if we get an error on input] printStream.close(); } }