コード例 #1
0
 /** Test map output compression ratio configuration utilities. */
 @Test
 public void testIntermediateCompressionRatioConfiguration() throws Exception {
   Configuration conf = new Configuration();
   float ratio = 0.567F;
   CompressionEmulationUtil.setMapOutputCompressionEmulationRatio(conf, ratio);
   assertEquals(ratio, CompressionEmulationUtil.getMapOutputCompressionEmulationRatio(conf), 0.0D);
 }
コード例 #2
0
 /** Test compression ratio standardization. */
 @Test
 public void testCompressionRatioStandardization() throws Exception {
   assertEquals(0.55F, CompressionEmulationUtil.standardizeCompressionRatio(0.55F), 0.0D);
   assertEquals(0.65F, CompressionEmulationUtil.standardizeCompressionRatio(0.652F), 0.0D);
   assertEquals(0.78F, CompressionEmulationUtil.standardizeCompressionRatio(0.777F), 0.0D);
   assertEquals(0.86F, CompressionEmulationUtil.standardizeCompressionRatio(0.855F), 0.0D);
 }
コード例 #3
0
  /** Test {@link RandomTextDataMapper} via {@link CompressionEmulationUtil}. */
  @Test
  public void testRandomCompressedTextDataGenerator() throws Exception {
    int wordSize = 10;
    int listSize = 20;
    long dataSize = 10 * 1024 * 1024;

    Configuration conf = new Configuration();
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);

    // configure the RandomTextDataGenerator to generate desired sized data
    conf.setInt(RandomTextDataGenerator.GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE, listSize);
    conf.setInt(RandomTextDataGenerator.GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE, wordSize);
    conf.setLong(GenerateData.GRIDMIX_GEN_BYTES, dataSize);

    FileSystem lfs = FileSystem.getLocal(conf);

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestRandomCompressedTextDataGenr");
    lfs.delete(tempDir, true);

    runDataGenJob(conf, tempDir);

    // validate the output data
    FileStatus[] files = lfs.listStatus(tempDir, new Utils.OutputFileUtils.OutputFilesFilter());
    long size = 0;
    long maxLineSize = 0;

    for (FileStatus status : files) {
      InputStream in =
          CompressionEmulationUtil.getPossiblyDecompressedInputStream(status.getPath(), conf, 0);
      BufferedReader reader = new BufferedReader(new InputStreamReader(in));
      String line = reader.readLine();
      if (line != null) {
        long lineSize = line.getBytes().length;
        if (lineSize > maxLineSize) {
          maxLineSize = lineSize;
        }
        while (line != null) {
          for (String word : line.split("\\s")) {
            size += word.getBytes().length;
          }
          line = reader.readLine();
        }
      }
      reader.close();
    }

    assertTrue(size >= dataSize);
    assertTrue(size <= dataSize + maxLineSize);
  }
コード例 #4
0
  /**
   * Test {@link CompressionEmulationUtil#isCompressionEmulationEnabled(
   * org.apache.hadoop.conf.Configuration)}.
   */
  @Test
  public void testIsCompressionEmulationEnabled() {
    Configuration conf = new Configuration();
    // Check default values
    assertTrue(CompressionEmulationUtil.isCompressionEmulationEnabled(conf));

    // Check disabled
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, false);
    assertFalse(CompressionEmulationUtil.isCompressionEmulationEnabled(conf));

    // Check enabled
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    assertTrue(CompressionEmulationUtil.isCompressionEmulationEnabled(conf));
  }
コード例 #5
0
  /**
   * Test if {@link CompressionEmulationUtil#configureCompressionEmulation(
   * org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.JobConf)} can extract compression
   * related configuration parameters.
   */
  @Test
  public void testExtractCompressionConfigs() {
    JobConf source = new JobConf();
    JobConf target = new JobConf();

    // set the default values
    source.setBoolean(FileOutputFormat.COMPRESS, false);
    source.set(FileOutputFormat.COMPRESS_CODEC, "MyDefaultCodec");
    source.set(FileOutputFormat.COMPRESS_TYPE, "MyDefaultType");
    source.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, false);
    source.set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, "MyDefaultCodec2");

    CompressionEmulationUtil.configureCompressionEmulation(source, target);

    // check default values
    assertFalse(target.getBoolean(FileOutputFormat.COMPRESS, true));
    assertEquals("MyDefaultCodec", target.get(FileOutputFormat.COMPRESS_CODEC));
    assertEquals("MyDefaultType", target.get(FileOutputFormat.COMPRESS_TYPE));
    assertFalse(target.getBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true));
    assertEquals("MyDefaultCodec2", target.get(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC));
    assertFalse(CompressionEmulationUtil.isInputCompressionEmulationEnabled(target));

    // set new values
    source.setBoolean(FileOutputFormat.COMPRESS, true);
    source.set(FileOutputFormat.COMPRESS_CODEC, "MyCodec");
    source.set(FileOutputFormat.COMPRESS_TYPE, "MyType");
    source.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
    source.set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, "MyCodec2");
    org.apache.hadoop.mapred.FileInputFormat.setInputPaths(source, "file.gz");

    target = new JobConf(); // reset
    CompressionEmulationUtil.configureCompressionEmulation(source, target);

    // check new values
    assertTrue(target.getBoolean(FileOutputFormat.COMPRESS, false));
    assertEquals("MyCodec", target.get(FileOutputFormat.COMPRESS_CODEC));
    assertEquals("MyType", target.get(FileOutputFormat.COMPRESS_TYPE));
    assertTrue(target.getBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, false));
    assertEquals("MyCodec2", target.get(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC));
    assertTrue(CompressionEmulationUtil.isInputCompressionEmulationEnabled(target));
  }
コード例 #6
0
  /**
   * Test of {@link FileQueue} can identify compressed file and provide readers to extract
   * uncompressed data only if input-compression is enabled.
   */
  @Test
  public void testFileQueueDecompression() throws IOException {
    JobConf conf = new JobConf();
    FileSystem lfs = FileSystem.getLocal(conf);
    String inputLine = "Hi Hello!";

    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);
    org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true);
    org.apache.hadoop.mapred.FileOutputFormat.setOutputCompressorClass(conf, GzipCodec.class);

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestFileQueueDecompression");
    lfs.delete(tempDir, true);

    // create a compressed file
    Path compressedFile = new Path(tempDir, "test");
    OutputStream out =
        CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf);
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
    writer.write(inputLine);
    writer.close();

    compressedFile = compressedFile.suffix(".gz");
    // now read back the data from the compressed stream using FileQueue
    long fileSize = lfs.listStatus(compressedFile)[0].getLen();
    CombineFileSplit split =
        new CombineFileSplit(new Path[] {compressedFile}, new long[] {fileSize});
    FileQueue queue = new FileQueue(split, conf);
    byte[] bytes = new byte[inputLine.getBytes().length];
    queue.read(bytes);
    queue.close();
    String readLine = new String(bytes);
    assertEquals("Compression/Decompression error", inputLine, readLine);
  }
コード例 #7
0
  /** Test compressible {@link GridmixRecord}. */
  @Test
  public void testCompressibleGridmixRecord() throws IOException {
    JobConf conf = new JobConf();
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);

    FileSystem lfs = FileSystem.getLocal(conf);
    int dataSize = 1024 * 1024 * 10; // 10 MB
    float ratio = 0.357F;

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestPossiblyCompressibleGridmixRecord");
    lfs.delete(tempDir, true);

    // define a compressible GridmixRecord
    GridmixRecord record = new GridmixRecord(dataSize, 0);
    record.setCompressibility(true, ratio); // enable compression

    conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class);
    org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true);

    // write the record to a file
    Path recordFile = new Path(tempDir, "record");
    OutputStream outStream =
        CompressionEmulationUtil.getPossiblyCompressedOutputStream(recordFile, conf);
    DataOutputStream out = new DataOutputStream(outStream);
    record.write(out);
    out.close();
    outStream.close();

    // open the compressed stream for reading
    Path actualRecordFile = recordFile.suffix(".gz");
    InputStream in =
        CompressionEmulationUtil.getPossiblyDecompressedInputStream(actualRecordFile, conf, 0);

    // get the compressed file size
    long compressedFileSize = lfs.listStatus(actualRecordFile)[0].getLen();

    GridmixRecord recordRead = new GridmixRecord();
    recordRead.readFields(new DataInputStream(in));

    assertEquals(
        "Record size mismatch in a compressible GridmixRecord", dataSize, recordRead.getSize());
    assertTrue(
        "Failed to generate a compressible GridmixRecord",
        recordRead.getSize() > compressedFileSize);

    // check if the record can generate data with the desired compression ratio
    float seenRatio = ((float) compressedFileSize) / dataSize;
    assertEquals(
        CompressionEmulationUtil.standardizeCompressionRatio(ratio),
        CompressionEmulationUtil.standardizeCompressionRatio(seenRatio),
        1.0D);
  }
コード例 #8
0
  /**
   * Test {@link CompressionEmulationUtil#getPossiblyDecompressedInputStream(Path, Configuration,
   * long)} and {@link CompressionEmulationUtil#getPossiblyCompressedOutputStream(Path,
   * Configuration)}.
   */
  @Test
  public void testPossiblyCompressedDecompressedStreams() throws IOException {
    JobConf conf = new JobConf();
    FileSystem lfs = FileSystem.getLocal(conf);
    String inputLine = "Hi Hello!";

    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);
    conf.setBoolean(FileOutputFormat.COMPRESS, true);
    conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class);

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestPossiblyCompressedDecompressedStreams");
    lfs.delete(tempDir, true);

    // create a compressed file
    Path compressedFile = new Path(tempDir, "test");
    OutputStream out =
        CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf);
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
    writer.write(inputLine);
    writer.close();

    // now read back the data from the compressed stream
    compressedFile = compressedFile.suffix(".gz");
    InputStream in =
        CompressionEmulationUtil.getPossiblyDecompressedInputStream(compressedFile, conf, 0);
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    String readLine = reader.readLine();
    assertEquals("Compression/Decompression error", inputLine, readLine);
    reader.close();
  }
コード例 #9
0
  /** Runs a GridMix data-generation job. */
  private static void runDataGenJob(Configuration conf, Path tempDir)
      throws IOException, ClassNotFoundException, InterruptedException {
    JobClient client = new JobClient(conf);

    // get the local job runner
    conf.setInt(MRJobConfig.NUM_MAPS, 1);

    Job job = new Job(conf);

    CompressionEmulationUtil.configure(job);
    job.setInputFormatClass(CustomInputFormat.class);

    // set the output path
    FileOutputFormat.setOutputPath(job, tempDir);

    // submit and wait for completion
    job.submit();
    int ret = job.waitForCompletion(true) ? 0 : 1;

    assertEquals("Job Failed", 0, ret);
  }
コード例 #10
0
ファイル: Gridmix.java プロジェクト: Jude7/bc-hadoop2.0
  /**
   * Write random bytes at the path &lt;inputDir&gt;.
   *
   * @see org.apache.hadoop.mapred.gridmix.GenerateData
   */
  protected void writeInputData(long genbytes, Path inputDir)
      throws IOException, InterruptedException {
    final Configuration conf = getConf();

    // configure the compression ratio if needed
    CompressionEmulationUtil.setupDataGeneratorConfig(conf);

    final GenerateData genData = new GenerateData(conf, inputDir, genbytes);
    LOG.info("Generating " + StringUtils.humanReadableInt(genbytes) + " of test data...");
    launchGridmixJob(genData);

    FsShell shell = new FsShell(conf);
    try {
      LOG.info("Changing the permissions for inputPath " + inputDir.toString());
      shell.run(new String[] {"-chmod", "-R", "777", inputDir.toString()});
    } catch (Exception e) {
      LOG.error("Couldnt change the file permissions ", e);
      throw new IOException(e);
    }

    LOG.info("Input data generation successful.");
  }
コード例 #11
0
  /**
   * Test if {@link RandomTextDataGenerator} can generate random text data with the desired
   * compression ratio. This involves - using {@link CompressionEmulationUtil} to configure the MR
   * job for generating the random text data with the desired compression ratio - running the MR job
   * - test {@link RandomTextDataGenerator}'s output and match the output size (compressed) with the
   * expected compression ratio.
   */
  private void testCompressionRatioConfigure(float ratio) throws Exception {
    long dataSize = 10 * 1024 * 1024;

    Configuration conf = new Configuration();
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);

    conf.setLong(GenerateData.GRIDMIX_GEN_BYTES, dataSize);

    float expectedRatio = CompressionEmulationUtil.DEFAULT_COMPRESSION_RATIO;
    if (ratio > 0) {
      // set the compression ratio in the conf
      CompressionEmulationUtil.setMapInputCompressionEmulationRatio(conf, ratio);
      expectedRatio = CompressionEmulationUtil.standardizeCompressionRatio(ratio);
    }

    // invoke the utility to map from ratio to word-size
    CompressionEmulationUtil.setupDataGeneratorConfig(conf);

    FileSystem lfs = FileSystem.getLocal(conf);

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestCustomRandomCompressedTextDataGenr");
    lfs.delete(tempDir, true);

    runDataGenJob(conf, tempDir);

    // validate the output data
    FileStatus[] files = lfs.listStatus(tempDir, new Utils.OutputFileUtils.OutputFilesFilter());
    long size = 0;

    for (FileStatus status : files) {
      size += status.getLen();
    }

    float compressionRatio = ((float) size) / dataSize;
    float stdRatio = CompressionEmulationUtil.standardizeCompressionRatio(compressionRatio);

    assertEquals(expectedRatio, stdRatio, 0.0D);
  }