Example #1
0
    public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
      FileSplit split = (FileSplit) genericSplit;
      Configuration job = context.getConfiguration();
      m_Sb.setLength(0);
      m_Start = split.getStart();
      m_End = m_Start + split.getLength();
      final Path file = split.getPath();
      compressionCodecs = new CompressionCodecFactory(job);
      final CompressionCodec codec = compressionCodecs.getCodec(file);

      // open the file and seek to the m_Start of the split
      FileSystem fs = file.getFileSystem(job);
      //  getFileStatus fileStatus = fs.getFileStatus(split.getPath());
      //noinspection deprecation
      @SuppressWarnings(value = "deprecated")
      long length = fs.getLength(file);
      FSDataInputStream fileIn = fs.open(split.getPath());
      if (m_Start > 0) fileIn.seek(m_Start);
      if (codec != null) {
        CompressionInputStream inputStream = codec.createInputStream(fileIn);
        m_Input = new BufferedReader(new InputStreamReader(inputStream));
        m_End = length;
      } else {
        m_Input = new BufferedReader(new InputStreamReader(fileIn));
      }
      m_Current = m_Start;
      m_Key = split.getPath().getName();
    }
 /**
  * A little test program.
  *
  * @param args
  */
 public static void main(String[] args) throws Exception {
   Configuration conf = new Configuration();
   CompressionCodecFactory factory = new CompressionCodecFactory(conf);
   boolean encode = false;
   for (int i = 0; i < args.length; ++i) {
     if ("-in".equals(args[i])) {
       encode = true;
     } else if ("-out".equals(args[i])) {
       encode = false;
     } else {
       CompressionCodec codec = factory.getCodec(new Path(args[i]));
       if (codec == null) {
         System.out.println("Codec for " + args[i] + " not found.");
       } else {
         if (encode) {
           CompressionOutputStream out =
               codec.createOutputStream(new java.io.FileOutputStream(args[i]));
           byte[] buffer = new byte[100];
           String inFilename = removeSuffix(args[i], codec.getDefaultExtension());
           java.io.InputStream in = new java.io.FileInputStream(inFilename);
           int len = in.read(buffer);
           while (len > 0) {
             out.write(buffer, 0, len);
             len = in.read(buffer);
           }
           in.close();
           out.close();
         } else {
           CompressionInputStream in =
               codec.createInputStream(new java.io.FileInputStream(args[i]));
           byte[] buffer = new byte[100];
           int len = in.read(buffer);
           while (len > 0) {
             System.out.write(buffer, 0, len);
             len = in.read(buffer);
           }
           in.close();
         }
       }
     }
   }
 }
Example #3
0
  public static void testFinding() {
    CompressionCodecFactory factory = new CompressionCodecFactory(new Configuration());
    CompressionCodec codec = factory.getCodec(new Path("/tmp/foo.bar"));
    assertEquals("default factory foo codec", null, codec);
    codec = factory.getCodecByClassName(BarCodec.class.getCanonicalName());
    assertEquals("default factory foo codec", null, codec);

    codec = factory.getCodec(new Path("/tmp/foo.gz"));
    checkCodec("default factory for .gz", GzipCodec.class, codec);
    codec = factory.getCodecByClassName(GzipCodec.class.getCanonicalName());
    checkCodec("default factory for gzip codec", GzipCodec.class, codec);
    codec = factory.getCodecByName("gzip");
    checkCodec("default factory for gzip codec", GzipCodec.class, codec);
    codec = factory.getCodecByName("GZIP");
    checkCodec("default factory for gzip codec", GzipCodec.class, codec);
    codec = factory.getCodecByName("GZIPCodec");
    checkCodec("default factory for gzip codec", GzipCodec.class, codec);
    codec = factory.getCodecByName("gzipcodec");
    checkCodec("default factory for gzip codec", GzipCodec.class, codec);
    Class klass = factory.getCodecClassByName("gzipcodec");
    assertEquals(GzipCodec.class, klass);

    codec = factory.getCodec(new Path("/tmp/foo.bz2"));
    checkCodec("default factory for .bz2", BZip2Codec.class, codec);
    codec = factory.getCodecByClassName(BZip2Codec.class.getCanonicalName());
    checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);
    codec = factory.getCodecByName("bzip2");
    checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);
    codec = factory.getCodecByName("bzip2codec");
    checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);
    codec = factory.getCodecByName("BZIP2");
    checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);
    codec = factory.getCodecByName("BZIP2CODEC");
    checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);

    codec = factory.getCodecByClassName(DeflateCodec.class.getCanonicalName());
    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);
    codec = factory.getCodecByName("deflate");
    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);
    codec = factory.getCodecByName("deflatecodec");
    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);
    codec = factory.getCodecByName("DEFLATE");
    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);
    codec = factory.getCodecByName("DEFLATECODEC");
    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);

    factory = setClasses(new Class[0]);
    // gz, bz2, snappy, lz4 are picked up by service loader, but bar isn't
    codec = factory.getCodec(new Path("/tmp/foo.bar"));
    assertEquals("empty factory bar codec", null, codec);
    codec = factory.getCodecByClassName(BarCodec.class.getCanonicalName());
    assertEquals("empty factory bar codec", null, codec);

    codec = factory.getCodec(new Path("/tmp/foo.gz"));
    checkCodec("empty factory gz codec", GzipCodec.class, codec);
    codec = factory.getCodecByClassName(GzipCodec.class.getCanonicalName());
    checkCodec("empty factory gz codec", GzipCodec.class, codec);

    codec = factory.getCodec(new Path("/tmp/foo.bz2"));
    checkCodec("empty factory for .bz2", BZip2Codec.class, codec);
    codec = factory.getCodecByClassName(BZip2Codec.class.getCanonicalName());
    checkCodec("empty factory for bzip2 codec", BZip2Codec.class, codec);

    codec = factory.getCodec(new Path("/tmp/foo.snappy"));
    checkCodec("empty factory snappy codec", SnappyCodec.class, codec);
    codec = factory.getCodecByClassName(SnappyCodec.class.getCanonicalName());
    checkCodec("empty factory snappy codec", SnappyCodec.class, codec);

    codec = factory.getCodec(new Path("/tmp/foo.lz4"));
    checkCodec("empty factory lz4 codec", Lz4Codec.class, codec);
    codec = factory.getCodecByClassName(Lz4Codec.class.getCanonicalName());
    checkCodec("empty factory lz4 codec", Lz4Codec.class, codec);

    factory = setClasses(new Class[] {BarCodec.class, FooCodec.class, FooBarCodec.class});
    codec = factory.getCodec(new Path("/tmp/.foo.bar.gz"));
    checkCodec("full factory gz codec", GzipCodec.class, codec);
    codec = factory.getCodecByClassName(GzipCodec.class.getCanonicalName());
    checkCodec("full codec gz codec", GzipCodec.class, codec);

    codec = factory.getCodec(new Path("/tmp/foo.bz2"));
    checkCodec("full factory for .bz2", BZip2Codec.class, codec);
    codec = factory.getCodecByClassName(BZip2Codec.class.getCanonicalName());
    checkCodec("full codec bzip2 codec", BZip2Codec.class, codec);

    codec = factory.getCodec(new Path("/tmp/foo.bar"));
    checkCodec("full factory bar codec", BarCodec.class, codec);
    codec = factory.getCodecByClassName(BarCodec.class.getCanonicalName());
    checkCodec("full factory bar codec", BarCodec.class, codec);
    codec = factory.getCodecByName("bar");
    checkCodec("full factory bar codec", BarCodec.class, codec);
    codec = factory.getCodecByName("BAR");
    checkCodec("full factory bar codec", BarCodec.class, codec);

    codec = factory.getCodec(new Path("/tmp/foo/baz.foo.bar"));
    checkCodec("full factory foo bar codec", FooBarCodec.class, codec);
    codec = factory.getCodecByClassName(FooBarCodec.class.getCanonicalName());
    checkCodec("full factory foo bar codec", FooBarCodec.class, codec);
    codec = factory.getCodecByName("foobar");
    checkCodec("full factory foo bar codec", FooBarCodec.class, codec);
    codec = factory.getCodecByName("FOOBAR");
    checkCodec("full factory foo bar codec", FooBarCodec.class, codec);

    codec = factory.getCodec(new Path("/tmp/foo.foo"));
    checkCodec("full factory foo codec", FooCodec.class, codec);
    codec = factory.getCodecByClassName(FooCodec.class.getCanonicalName());
    checkCodec("full factory foo codec", FooCodec.class, codec);
    codec = factory.getCodecByName("foo");
    checkCodec("full factory foo codec", FooCodec.class, codec);
    codec = factory.getCodecByName("FOO");
    checkCodec("full factory foo codec", FooCodec.class, codec);

    factory = setClasses(new Class[] {NewGzipCodec.class});
    codec = factory.getCodec(new Path("/tmp/foo.gz"));
    checkCodec("overridden factory for .gz", NewGzipCodec.class, codec);
    codec = factory.getCodecByClassName(NewGzipCodec.class.getCanonicalName());
    checkCodec("overridden factory for gzip codec", NewGzipCodec.class, codec);
  }
Example #4
0
 /**
  * Returns a factory for a given set of codecs
  *
  * @param classes the codec classes to include
  * @return a new factory
  */
 private static CompressionCodecFactory setClasses(Class[] classes) {
   Configuration conf = new Configuration();
   CompressionCodecFactory.setCodecClasses(conf, Arrays.asList(classes));
   return new CompressionCodecFactory(conf);
 }