Example #1
0
 @Override
 protected void cleanup(Context context) throws IOException, InterruptedException {
   val.setSeed(r.nextLong());
   while (factory.next(null, val)) {
     context.write(NullWritable.get(), val);
     val.setSeed(r.nextLong());
   }
 }
  /** Test compressible {@link GridmixRecord}. */
  @Test
  public void testCompressibleGridmixRecord() throws IOException {
    JobConf conf = new JobConf();
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);

    FileSystem lfs = FileSystem.getLocal(conf);
    int dataSize = 1024 * 1024 * 10; // 10 MB
    float ratio = 0.357F;

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestPossiblyCompressibleGridmixRecord");
    lfs.delete(tempDir, true);

    // define a compressible GridmixRecord
    GridmixRecord record = new GridmixRecord(dataSize, 0);
    record.setCompressibility(true, ratio); // enable compression

    conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class);
    org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true);

    // write the record to a file
    Path recordFile = new Path(tempDir, "record");
    OutputStream outStream =
        CompressionEmulationUtil.getPossiblyCompressedOutputStream(recordFile, conf);
    DataOutputStream out = new DataOutputStream(outStream);
    record.write(out);
    out.close();
    outStream.close();

    // open the compressed stream for reading
    Path actualRecordFile = recordFile.suffix(".gz");
    InputStream in =
        CompressionEmulationUtil.getPossiblyDecompressedInputStream(actualRecordFile, conf, 0);

    // get the compressed file size
    long compressedFileSize = lfs.listStatus(actualRecordFile)[0].getLen();

    GridmixRecord recordRead = new GridmixRecord();
    recordRead.readFields(new DataInputStream(in));

    assertEquals(
        "Record size mismatch in a compressible GridmixRecord", dataSize, recordRead.getSize());
    assertTrue(
        "Failed to generate a compressible GridmixRecord",
        recordRead.getSize() > compressedFileSize);

    // check if the record can generate data with the desired compression ratio
    float seenRatio = ((float) compressedFileSize) / dataSize;
    assertEquals(
        CompressionEmulationUtil.standardizeCompressionRatio(ratio),
        CompressionEmulationUtil.standardizeCompressionRatio(seenRatio),
        1.0D);
  }
Example #3
0
 @Override
 public void map(NullWritable ignored, GridmixRecord rec, Context context)
     throws IOException, InterruptedException {
   acc += ratio;
   while (acc >= 1.0 && !reduces.isEmpty()) {
     key.setSeed(r.nextLong());
     val.setSeed(r.nextLong());
     final int idx = r.nextInt(reduces.size());
     final RecordFactory f = reduces.get(idx);
     if (!f.next(key, val)) {
       reduces.remove(idx);
       continue;
     }
     context.write(key, val);
     acc -= 1.0;
   }
 }
Example #4
0
 @Override
 public boolean nextKeyValue() throws IOException {
   val.setSeed(r.nextLong());
   return factory.next(null, val);
 }