/** * Test if {@link RandomTextDataGenerator} can generate random text data with the desired * compression ratio. This involves - using {@link CompressionEmulationUtil} to configure the MR * job for generating the random text data with the desired compression ratio - running the MR job * - test {@link RandomTextDataGenerator}'s output and match the output size (compressed) with the * expected compression ratio. */ private void testCompressionRatioConfigure(float ratio) throws Exception { long dataSize = 10 * 1024 * 1024; Configuration conf = new Configuration(); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); conf.setLong(GenerateData.GRIDMIX_GEN_BYTES, dataSize); float expectedRatio = CompressionEmulationUtil.DEFAULT_COMPRESSION_RATIO; if (ratio > 0) { // set the compression ratio in the conf CompressionEmulationUtil.setMapInputCompressionEmulationRatio(conf, ratio); expectedRatio = CompressionEmulationUtil.standardizeCompressionRatio(ratio); } // invoke the utility to map from ratio to word-size CompressionEmulationUtil.setupDataGeneratorConfig(conf); FileSystem lfs = FileSystem.getLocal(conf); // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestCustomRandomCompressedTextDataGenr"); lfs.delete(tempDir, true); runDataGenJob(conf, tempDir); // validate the output data FileStatus[] files = lfs.listStatus(tempDir, new Utils.OutputFileUtils.OutputFilesFilter()); long size = 0; for (FileStatus status : files) { size += status.getLen(); } float compressionRatio = ((float) size) / dataSize; float stdRatio = CompressionEmulationUtil.standardizeCompressionRatio(compressionRatio); assertEquals(expectedRatio, stdRatio, 0.0D); }
/** * Write random bytes at the path <inputDir>. * * @see org.apache.hadoop.mapred.gridmix.GenerateData */ protected void writeInputData(long genbytes, Path inputDir) throws IOException, InterruptedException { final Configuration conf = getConf(); // configure the compression ratio if needed CompressionEmulationUtil.setupDataGeneratorConfig(conf); final GenerateData genData = new GenerateData(conf, inputDir, genbytes); LOG.info("Generating " + StringUtils.humanReadableInt(genbytes) + " of test data..."); launchGridmixJob(genData); FsShell shell = new FsShell(conf); try { LOG.info("Changing the permissions for inputPath " + inputDir.toString()); shell.run(new String[] {"-chmod", "-R", "777", inputDir.toString()}); } catch (Exception e) { LOG.error("Couldnt change the file permissions ", e); throw new IOException(e); } LOG.info("Input data generation successful."); }