/** Test compression ratio standardization. */
 @Test
 public void testCompressionRatioStandardization() throws Exception {
   assertEquals(0.55F, CompressionEmulationUtil.standardizeCompressionRatio(0.55F), 0.0D);
   assertEquals(0.65F, CompressionEmulationUtil.standardizeCompressionRatio(0.652F), 0.0D);
   assertEquals(0.78F, CompressionEmulationUtil.standardizeCompressionRatio(0.777F), 0.0D);
   assertEquals(0.86F, CompressionEmulationUtil.standardizeCompressionRatio(0.855F), 0.0D);
 }
  /** Test compressible {@link GridmixRecord}. */
  @Test
  public void testCompressibleGridmixRecord() throws IOException {
    JobConf conf = new JobConf();
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);

    FileSystem lfs = FileSystem.getLocal(conf);
    int dataSize = 1024 * 1024 * 10; // 10 MB
    float ratio = 0.357F;

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestPossiblyCompressibleGridmixRecord");
    lfs.delete(tempDir, true);

    // define a compressible GridmixRecord
    GridmixRecord record = new GridmixRecord(dataSize, 0);
    record.setCompressibility(true, ratio); // enable compression

    conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class);
    org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true);

    // write the record to a file
    Path recordFile = new Path(tempDir, "record");
    OutputStream outStream =
        CompressionEmulationUtil.getPossiblyCompressedOutputStream(recordFile, conf);
    DataOutputStream out = new DataOutputStream(outStream);
    record.write(out);
    out.close();
    outStream.close();

    // open the compressed stream for reading
    Path actualRecordFile = recordFile.suffix(".gz");
    InputStream in =
        CompressionEmulationUtil.getPossiblyDecompressedInputStream(actualRecordFile, conf, 0);

    // get the compressed file size
    long compressedFileSize = lfs.listStatus(actualRecordFile)[0].getLen();

    GridmixRecord recordRead = new GridmixRecord();
    recordRead.readFields(new DataInputStream(in));

    assertEquals(
        "Record size mismatch in a compressible GridmixRecord", dataSize, recordRead.getSize());
    assertTrue(
        "Failed to generate a compressible GridmixRecord",
        recordRead.getSize() > compressedFileSize);

    // check if the record can generate data with the desired compression ratio
    float seenRatio = ((float) compressedFileSize) / dataSize;
    assertEquals(
        CompressionEmulationUtil.standardizeCompressionRatio(ratio),
        CompressionEmulationUtil.standardizeCompressionRatio(seenRatio),
        1.0D);
  }
  /**
   * Test if {@link RandomTextDataGenerator} can generate random text data with the desired
   * compression ratio. This involves - using {@link CompressionEmulationUtil} to configure the MR
   * job for generating the random text data with the desired compression ratio - running the MR job
   * - test {@link RandomTextDataGenerator}'s output and match the output size (compressed) with the
   * expected compression ratio.
   */
  private void testCompressionRatioConfigure(float ratio) throws Exception {
    long dataSize = 10 * 1024 * 1024;

    Configuration conf = new Configuration();
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);

    conf.setLong(GenerateData.GRIDMIX_GEN_BYTES, dataSize);

    float expectedRatio = CompressionEmulationUtil.DEFAULT_COMPRESSION_RATIO;
    if (ratio > 0) {
      // set the compression ratio in the conf
      CompressionEmulationUtil.setMapInputCompressionEmulationRatio(conf, ratio);
      expectedRatio = CompressionEmulationUtil.standardizeCompressionRatio(ratio);
    }

    // invoke the utility to map from ratio to word-size
    CompressionEmulationUtil.setupDataGeneratorConfig(conf);

    FileSystem lfs = FileSystem.getLocal(conf);

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestCustomRandomCompressedTextDataGenr");
    lfs.delete(tempDir, true);

    runDataGenJob(conf, tempDir);

    // validate the output data
    FileStatus[] files = lfs.listStatus(tempDir, new Utils.OutputFileUtils.OutputFilesFilter());
    long size = 0;

    for (FileStatus status : files) {
      size += status.getLen();
    }

    float compressionRatio = ((float) size) / dataSize;
    float stdRatio = CompressionEmulationUtil.standardizeCompressionRatio(compressionRatio);

    assertEquals(expectedRatio, stdRatio, 0.0D);
  }