/**
   * Test if {@link RandomTextDataGenerator} can generate random text data with the desired
   * compression ratio. This involves - using {@link CompressionEmulationUtil} to configure the MR
   * job for generating the random text data with the desired compression ratio - running the MR job
   * - test {@link RandomTextDataGenerator}'s output and match the output size (compressed) with the
   * expected compression ratio.
   */
  private void testCompressionRatioConfigure(float ratio) throws Exception {
    long dataSize = 10 * 1024 * 1024;

    Configuration conf = new Configuration();
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);

    conf.setLong(GenerateData.GRIDMIX_GEN_BYTES, dataSize);

    float expectedRatio = CompressionEmulationUtil.DEFAULT_COMPRESSION_RATIO;
    if (ratio > 0) {
      // set the compression ratio in the conf
      CompressionEmulationUtil.setMapInputCompressionEmulationRatio(conf, ratio);
      expectedRatio = CompressionEmulationUtil.standardizeCompressionRatio(ratio);
    }

    // invoke the utility to map from ratio to word-size
    CompressionEmulationUtil.setupDataGeneratorConfig(conf);

    FileSystem lfs = FileSystem.getLocal(conf);

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestCustomRandomCompressedTextDataGenr");
    lfs.delete(tempDir, true);

    runDataGenJob(conf, tempDir);

    // validate the output data
    FileStatus[] files = lfs.listStatus(tempDir, new Utils.OutputFileUtils.OutputFilesFilter());
    long size = 0;

    for (FileStatus status : files) {
      size += status.getLen();
    }

    float compressionRatio = ((float) size) / dataSize;
    float stdRatio = CompressionEmulationUtil.standardizeCompressionRatio(compressionRatio);

    assertEquals(expectedRatio, stdRatio, 0.0D);
  }
Example #2
0
  /**
   * Write random bytes at the path <inputDir>.
   *
   * @see org.apache.hadoop.mapred.gridmix.GenerateData
   */
  protected void writeInputData(long genbytes, Path inputDir)
      throws IOException, InterruptedException {
    final Configuration conf = getConf();

    // configure the compression ratio if needed
    CompressionEmulationUtil.setupDataGeneratorConfig(conf);

    final GenerateData genData = new GenerateData(conf, inputDir, genbytes);
    LOG.info("Generating " + StringUtils.humanReadableInt(genbytes) + " of test data...");
    launchGridmixJob(genData);

    FsShell shell = new FsShell(conf);
    try {
      LOG.info("Changing the permissions for inputPath " + inputDir.toString());
      shell.run(new String[] {"-chmod", "-R", "777", inputDir.toString()});
    } catch (Exception e) {
      LOG.error("Couldnt change the file permissions ", e);
      throw new IOException(e);
    }

    LOG.info("Input data generation successful.");
  }