/** Test compressible {@link GridmixRecord}. */ @Test public void testCompressibleGridmixRecord() throws IOException { JobConf conf = new JobConf(); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); FileSystem lfs = FileSystem.getLocal(conf); int dataSize = 1024 * 1024 * 10; // 10 MB float ratio = 0.357F; // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestPossiblyCompressibleGridmixRecord"); lfs.delete(tempDir, true); // define a compressible GridmixRecord GridmixRecord record = new GridmixRecord(dataSize, 0); record.setCompressibility(true, ratio); // enable compression conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class); org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true); // write the record to a file Path recordFile = new Path(tempDir, "record"); OutputStream outStream = CompressionEmulationUtil.getPossiblyCompressedOutputStream(recordFile, conf); DataOutputStream out = new DataOutputStream(outStream); record.write(out); out.close(); outStream.close(); // open the compressed stream for reading Path actualRecordFile = recordFile.suffix(".gz"); InputStream in = CompressionEmulationUtil.getPossiblyDecompressedInputStream(actualRecordFile, conf, 0); // get the compressed file size long compressedFileSize = lfs.listStatus(actualRecordFile)[0].getLen(); GridmixRecord recordRead = new GridmixRecord(); recordRead.readFields(new DataInputStream(in)); assertEquals( "Record size mismatch in a compressible GridmixRecord", dataSize, recordRead.getSize()); assertTrue( "Failed to generate a compressible GridmixRecord", recordRead.getSize() > compressedFileSize); // check if the record can generate data with the desired compression ratio float seenRatio = ((float) compressedFileSize) / dataSize; assertEquals( CompressionEmulationUtil.standardizeCompressionRatio(ratio), CompressionEmulationUtil.standardizeCompressionRatio(seenRatio), 1.0D); }
/** * Test of {@link FileQueue} can identify compressed file and provide readers to extract * uncompressed data only if input-compression is enabled. */ @Test public void testFileQueueDecompression() throws IOException { JobConf conf = new JobConf(); FileSystem lfs = FileSystem.getLocal(conf); String inputLine = "Hi Hello!"; CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true); org.apache.hadoop.mapred.FileOutputFormat.setOutputCompressorClass(conf, GzipCodec.class); // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestFileQueueDecompression"); lfs.delete(tempDir, true); // create a compressed file Path compressedFile = new Path(tempDir, "test"); OutputStream out = CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); writer.write(inputLine); writer.close(); compressedFile = compressedFile.suffix(".gz"); // now read back the data from the compressed stream using FileQueue long fileSize = lfs.listStatus(compressedFile)[0].getLen(); CombineFileSplit split = new CombineFileSplit(new Path[] {compressedFile}, new long[] {fileSize}); FileQueue queue = new FileQueue(split, conf); byte[] bytes = new byte[inputLine.getBytes().length]; queue.read(bytes); queue.close(); String readLine = new String(bytes); assertEquals("Compression/Decompression error", inputLine, readLine); }