/** * Test {@link CompressionEmulationUtil#isCompressionEmulationEnabled( * org.apache.hadoop.conf.Configuration)}. */ @Test public void testIsCompressionEmulationEnabled() { Configuration conf = new Configuration(); // Check default values assertTrue(CompressionEmulationUtil.isCompressionEmulationEnabled(conf)); // Check disabled CompressionEmulationUtil.setCompressionEmulationEnabled(conf, false); assertFalse(CompressionEmulationUtil.isCompressionEmulationEnabled(conf)); // Check enabled CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); assertTrue(CompressionEmulationUtil.isCompressionEmulationEnabled(conf)); }
/** Test compressible {@link GridmixRecord}. */ @Test public void testCompressibleGridmixRecord() throws IOException { JobConf conf = new JobConf(); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); FileSystem lfs = FileSystem.getLocal(conf); int dataSize = 1024 * 1024 * 10; // 10 MB float ratio = 0.357F; // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestPossiblyCompressibleGridmixRecord"); lfs.delete(tempDir, true); // define a compressible GridmixRecord GridmixRecord record = new GridmixRecord(dataSize, 0); record.setCompressibility(true, ratio); // enable compression conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class); org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true); // write the record to a file Path recordFile = new Path(tempDir, "record"); OutputStream outStream = CompressionEmulationUtil.getPossiblyCompressedOutputStream(recordFile, conf); DataOutputStream out = new DataOutputStream(outStream); record.write(out); out.close(); outStream.close(); // open the compressed stream for reading Path actualRecordFile = recordFile.suffix(".gz"); InputStream in = CompressionEmulationUtil.getPossiblyDecompressedInputStream(actualRecordFile, conf, 0); // get the compressed file size long compressedFileSize = lfs.listStatus(actualRecordFile)[0].getLen(); GridmixRecord recordRead = new GridmixRecord(); recordRead.readFields(new DataInputStream(in)); assertEquals( "Record size mismatch in a compressible GridmixRecord", dataSize, recordRead.getSize()); assertTrue( "Failed to generate a compressible GridmixRecord", recordRead.getSize() > compressedFileSize); // check if the record can generate data with the desired compression ratio float seenRatio = ((float) compressedFileSize) / dataSize; assertEquals( CompressionEmulationUtil.standardizeCompressionRatio(ratio), CompressionEmulationUtil.standardizeCompressionRatio(seenRatio), 1.0D); }
/** Test {@link RandomTextDataMapper} via {@link CompressionEmulationUtil}. */ @Test public void testRandomCompressedTextDataGenerator() throws Exception { int wordSize = 10; int listSize = 20; long dataSize = 10 * 1024 * 1024; Configuration conf = new Configuration(); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); // configure the RandomTextDataGenerator to generate desired sized data conf.setInt(RandomTextDataGenerator.GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE, listSize); conf.setInt(RandomTextDataGenerator.GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE, wordSize); conf.setLong(GenerateData.GRIDMIX_GEN_BYTES, dataSize); FileSystem lfs = FileSystem.getLocal(conf); // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestRandomCompressedTextDataGenr"); lfs.delete(tempDir, true); runDataGenJob(conf, tempDir); // validate the output data FileStatus[] files = lfs.listStatus(tempDir, new Utils.OutputFileUtils.OutputFilesFilter()); long size = 0; long maxLineSize = 0; for (FileStatus status : files) { InputStream in = CompressionEmulationUtil.getPossiblyDecompressedInputStream(status.getPath(), conf, 0); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String line = reader.readLine(); if (line != null) { long lineSize = line.getBytes().length; if (lineSize > maxLineSize) { maxLineSize = lineSize; } while (line != null) { for (String word : line.split("\\s")) { size += word.getBytes().length; } line = reader.readLine(); } } reader.close(); } assertTrue(size >= dataSize); assertTrue(size <= dataSize + maxLineSize); }
/** * Test if {@link RandomTextDataGenerator} can generate random text data with the desired * compression ratio. This involves - using {@link CompressionEmulationUtil} to configure the MR * job for generating the random text data with the desired compression ratio - running the MR job * - test {@link RandomTextDataGenerator}'s output and match the output size (compressed) with the * expected compression ratio. */ private void testCompressionRatioConfigure(float ratio) throws Exception { long dataSize = 10 * 1024 * 1024; Configuration conf = new Configuration(); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); conf.setLong(GenerateData.GRIDMIX_GEN_BYTES, dataSize); float expectedRatio = CompressionEmulationUtil.DEFAULT_COMPRESSION_RATIO; if (ratio > 0) { // set the compression ratio in the conf CompressionEmulationUtil.setMapInputCompressionEmulationRatio(conf, ratio); expectedRatio = CompressionEmulationUtil.standardizeCompressionRatio(ratio); } // invoke the utility to map from ratio to word-size CompressionEmulationUtil.setupDataGeneratorConfig(conf); FileSystem lfs = FileSystem.getLocal(conf); // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestCustomRandomCompressedTextDataGenr"); lfs.delete(tempDir, true); runDataGenJob(conf, tempDir); // validate the output data FileStatus[] files = lfs.listStatus(tempDir, new Utils.OutputFileUtils.OutputFilesFilter()); long size = 0; for (FileStatus status : files) { size += status.getLen(); } float compressionRatio = ((float) size) / dataSize; float stdRatio = CompressionEmulationUtil.standardizeCompressionRatio(compressionRatio); assertEquals(expectedRatio, stdRatio, 0.0D); }
/** * Test of {@link FileQueue} can identify compressed file and provide readers to extract * uncompressed data only if input-compression is enabled. */ @Test public void testFileQueueDecompression() throws IOException { JobConf conf = new JobConf(); FileSystem lfs = FileSystem.getLocal(conf); String inputLine = "Hi Hello!"; CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true); org.apache.hadoop.mapred.FileOutputFormat.setOutputCompressorClass(conf, GzipCodec.class); // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestFileQueueDecompression"); lfs.delete(tempDir, true); // create a compressed file Path compressedFile = new Path(tempDir, "test"); OutputStream out = CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); writer.write(inputLine); writer.close(); compressedFile = compressedFile.suffix(".gz"); // now read back the data from the compressed stream using FileQueue long fileSize = lfs.listStatus(compressedFile)[0].getLen(); CombineFileSplit split = new CombineFileSplit(new Path[] {compressedFile}, new long[] {fileSize}); FileQueue queue = new FileQueue(split, conf); byte[] bytes = new byte[inputLine.getBytes().length]; queue.read(bytes); queue.close(); String readLine = new String(bytes); assertEquals("Compression/Decompression error", inputLine, readLine); }
/** * Test {@link CompressionEmulationUtil#getPossiblyDecompressedInputStream(Path, Configuration, * long)} and {@link CompressionEmulationUtil#getPossiblyCompressedOutputStream(Path, * Configuration)}. */ @Test public void testPossiblyCompressedDecompressedStreams() throws IOException { JobConf conf = new JobConf(); FileSystem lfs = FileSystem.getLocal(conf); String inputLine = "Hi Hello!"; CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); conf.setBoolean(FileOutputFormat.COMPRESS, true); conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class); // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestPossiblyCompressedDecompressedStreams"); lfs.delete(tempDir, true); // create a compressed file Path compressedFile = new Path(tempDir, "test"); OutputStream out = CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); writer.write(inputLine); writer.close(); // now read back the data from the compressed stream compressedFile = compressedFile.suffix(".gz"); InputStream in = CompressionEmulationUtil.getPossiblyDecompressedInputStream(compressedFile, conf, 0); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String readLine = reader.readLine(); assertEquals("Compression/Decompression error", inputLine, readLine); reader.close(); }