Java CompressionCodecFactory示例，org.apache.hadoop.io.compress.CompressionCodecFactory Java示例

示例#1

0

显示文件

文件： LzoSplitRecordReader.java 项目： jso/hadoop-lzo

  @Override
  public void initialize(InputSplit genericSplit, TaskAttemptContext taskAttemptContext)
      throws IOException {
    context = taskAttemptContext;
    FileSplit fileSplit = (FileSplit) genericSplit;
    lzoFile = fileSplit.getPath();
    // The LzoSplitInputFormat is not splittable, so the split length is the whole file.
    totalFileSize = fileSplit.getLength();

    // Jump through some hoops to create the lzo codec.
    Configuration conf = CompatibilityUtil.getConfiguration(context);
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    CompressionCodec codec = factory.getCodec(lzoFile);
    ((Configurable) codec).setConf(conf);

    LzopDecompressor lzopDecompressor = (LzopDecompressor) codec.createDecompressor();
    FileSystem fs = lzoFile.getFileSystem(conf);
    rawInputStream = fs.open(lzoFile);

    // Creating the LzopInputStream here just reads the lzo header for us, nothing more.
    // We do the rest of our input off of the raw stream is.
    codec.createInputStream(rawInputStream, lzopDecompressor);

    // This must be called AFTER createInputStream is called, because createInputStream
    // is what reads the header, which has the checksum information.  Otherwise getChecksumsCount
    // erroneously returns zero, and all block offsets will be wrong.
    numCompressedChecksums = lzopDecompressor.getCompressedChecksumsCount();
    numDecompressedChecksums = lzopDecompressor.getDecompressedChecksumsCount();
  }

示例#2

0

显示文件

文件： DownloaderRecordReader.java 项目： NPSVisionLab/hipi

  @Override
  public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {

    // Obtain path to input list of input images and open input stream
    FileSplit fileSplit = (FileSplit) split;
    Path path = fileSplit.getPath();
    FileSystem fileSystem = path.getFileSystem(context.getConfiguration());
    FSDataInputStream fileIn = fileSystem.open(path);

    // Note the start and length fields in the FileSplit object are being used to
    // convey a range of lines in the input list of image URLs
    startLine = fileSplit.getStart();
    numLines = fileSplit.getLength();
    linesRead = 0; // total lines read by this particular record reader instance
    linesPerRecord = 100; // can be modified to change key/value pair size (may improve efficiency)

    // If it exists, get the relevant compression codec for the FileSplit
    CompressionCodecFactory codecFactory = new CompressionCodecFactory(context.getConfiguration());
    CompressionCodec codec = codecFactory.getCodec(path);

    // If the codec was found, use it to create an decompressed input stream.
    // Otherwise, assume input stream is already decompressed
    if (codec != null) {
      reader = new BufferedReader(new InputStreamReader(codec.createInputStream(fileIn)));
    } else {
      reader = new BufferedReader(new InputStreamReader(fileIn));
    }
  }

示例#3

0

显示文件

文件： TestExport.java 项目： arushisrivastava/sqoop

  private void verifyCompressedFile(Path f, int expectedNumLines) throws IOException {
    Configuration conf = new Configuration();
    conf.set("fs.default.name", "file:///");
    FileSystem fs = FileSystem.get(conf);
    InputStream is = fs.open(f);
    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
    CompressionCodec codec = ccf.getCodec(f);
    LOG.info("gzip check codec is " + codec);
    Decompressor decompressor = CodecPool.getDecompressor(codec);
    if (null == decompressor) {
      LOG.info("Verifying gzip sanity with null decompressor");
    } else {
      LOG.info("Verifying gzip sanity with decompressor: " + decompressor.toString());
    }
    is = codec.createInputStream(is, decompressor);
    BufferedReader r = new BufferedReader(new InputStreamReader(is));
    int numLines = 0;
    while (true) {
      String ln = r.readLine();
      if (ln == null) {
        break;
      }
      numLines++;
    }

    r.close();
    assertEquals("Did not read back correct number of lines", expectedNumLines, numLines);
    LOG.info("gzip sanity check returned " + numLines + " lines; ok.");
  }

示例#4

0

显示文件

文件： TestExport.java 项目： arushisrivastava/sqoop

  /**
   * Create a data file that gets exported to the db.
   *
   * @param fileNum the number of the file (for multi-file export)
   * @param numRecords how many records to write to the file.
   * @param gzip is true if the file should be gzipped.
   */
  private void createTextFile(
      int fileNum, int numRecords, boolean gzip, ColumnGenerator... extraCols) throws IOException {
    int startId = fileNum * numRecords;

    String ext = ".txt";
    if (gzip) {
      ext = ext + ".gz";
    }
    Path tablePath = getTablePath();
    Path filePath = new Path(tablePath, "part" + fileNum + ext);

    Configuration conf = new Configuration();
    conf.set("fs.default.name", "file:///");
    FileSystem fs = FileSystem.get(conf);
    fs.mkdirs(tablePath);
    OutputStream os = fs.create(filePath);
    if (gzip) {
      CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
      CompressionCodec codec = ccf.getCodec(filePath);
      os = codec.createOutputStream(os);
    }
    BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
    for (int i = 0; i < numRecords; i++) {
      w.write(getRecordLine(startId + i, extraCols));
    }
    w.close();
    os.close();

    if (gzip) {
      verifyCompressedFile(filePath, numRecords);
    }
  }

示例#5

0

显示文件

文件： QseqInputFormat.java 项目： ridvandongelci/Hadoop-BAM

    public QseqRecordReader(Configuration conf, FileSplit split) throws IOException {
      setConf(conf);
      file = split.getPath();
      start = split.getStart();
      end = start + split.getLength();

      FileSystem fs = file.getFileSystem(conf);
      FSDataInputStream fileIn = fs.open(file);

      CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
      CompressionCodec codec = codecFactory.getCodec(file);

      if (codec == null) // no codec.  Uncompressed file.
      {
        positionAtFirstRecord(fileIn);
        inputStream = fileIn;
      } else { // compressed file
        if (start != 0)
          throw new RuntimeException(
              "Start position for compressed file is not 0! (found " + start + ")");

        inputStream = codec.createInputStream(fileIn);
        end = Long.MAX_VALUE; // read until the end of the file
      }

      lineReader = new LineReader(inputStream);
    }

示例#6

0

显示文件

文件： HDFSTools.java 项目： imclab/faunus

  public static void decompressFile(
      final FileSystem fs, final String inFile, final String outFile, boolean deletePrevious)
      throws IOException {
    final Path inPath = new Path(inFile);
    final Path outPath = new Path(outFile);
    final CompressionCodecFactory factory = new CompressionCodecFactory(new Configuration());
    final CompressionCodec codec = factory.getCodec(inPath);
    final OutputStream out = fs.create(outPath);
    final InputStream in = codec.createInputStream(fs.open(inPath));
    IOUtils.copyBytes(in, out, 8192);
    IOUtils.closeStream(in);
    IOUtils.closeStream(out);

    if (deletePrevious) fs.delete(new Path(inFile), true);
  }

示例#7

0

显示文件

文件： HadoopFsHelper.java 项目： jinhyukchang/gobblin

 /**
  * Returns an {@link InputStream} to the specified file.
  *
  * <p>Note: It is the caller's responsibility to close the returned {@link InputStream}.
  *
  * @param path The path to the file to open.
  * @return An {@link InputStream} for the specified file.
  * @throws FileBasedHelperException if there is a problem opening the {@link InputStream} for the
  *     specified file.
  */
 @Override
 public InputStream getFileStream(String path) throws FileBasedHelperException {
   try {
     Path p = new Path(path);
     InputStream in = this.getFileSystem().open(p);
     // Account for compressed files (e.g. gzip).
     // https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala
     CompressionCodecFactory factory = new CompressionCodecFactory(this.getFileSystem().getConf());
     CompressionCodec codec = factory.getCodec(p);
     return (codec == null) ? in : codec.createInputStream(in);
   } catch (IOException e) {
     throw new FileBasedHelperException(
         "Cannot open file " + path + " due to " + e.getMessage(), e);
   }
 }

示例#8

0

显示文件

文件： MutilCharRecordReader.java 项目： sven0726/hive

  public MutilCharRecordReader(FileSplit inputSplit, Configuration job) throws IOException {

    maxLineLength = job.getInt("mapred.mutilCharRecordReader.maxlength", Integer.MAX_VALUE);
    start = inputSplit.getStart();
    end = start + inputSplit.getLength();
    final Path file = inputSplit.getPath();

    compressionCodecs = new CompressionCodecFactory(job);

    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // 打开文件系统
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(file);
    boolean skipFirstLine = false;

    if (codec != null) {
      lineReader = new LineReader(codec.createInputStream(fileIn), job);
      end = Long.MAX_VALUE;
    } else {
      if (start != 0) {
        skipFirstLine = true;
        --start;
        fileIn.seek(start);
      }
      lineReader = new LineReader(fileIn, job);
    }

    if (skipFirstLine) {
      start +=
          lineReader.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
  }

示例#9

0

显示文件

文件： XMLTagInputFormat.java 项目： lordjoe/ChemSpark

    public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
      FileSplit split = (FileSplit) genericSplit;
      Configuration job = context.getConfiguration();
      m_Sb.setLength(0);
      m_Start = split.getStart();
      m_End = m_Start + split.getLength();
      final Path file = split.getPath();
      compressionCodecs = new CompressionCodecFactory(job);
      final CompressionCodec codec = compressionCodecs.getCodec(file);

      // open the file and seek to the m_Start of the split
      FileSystem fs = file.getFileSystem(job);
      //  getFileStatus fileStatus = fs.getFileStatus(split.getPath());
      //noinspection deprecation
      @SuppressWarnings(value = "deprecated")
      long length = fs.getLength(file);
      FSDataInputStream fileIn = fs.open(split.getPath());
      if (m_Start > 0) fileIn.seek(m_Start);
      if (codec != null) {
        CompressionInputStream inputStream = codec.createInputStream(fileIn);
        m_Input = new BufferedReader(new InputStreamReader(inputStream));
        m_End = length;
      } else {
        m_Input = new BufferedReader(new InputStreamReader(fileIn));
      }
      m_Current = m_Start;
      m_Key = split.getPath().getName();
    }

示例#10

0

显示文件

文件： CompressionCodecFactory.java 项目： ifloating/hadoop-source-reading

 /**
  * A little test program.
  *
  * @param args
  */
 public static void main(String[] args) throws Exception {
   Configuration conf = new Configuration();
   CompressionCodecFactory factory = new CompressionCodecFactory(conf);
   boolean encode = false;
   for (int i = 0; i < args.length; ++i) {
     if ("-in".equals(args[i])) {
       encode = true;
     } else if ("-out".equals(args[i])) {
       encode = false;
     } else {
       CompressionCodec codec = factory.getCodec(new Path(args[i]));
       if (codec == null) {
         System.out.println("Codec for " + args[i] + " not found.");
       } else {
         if (encode) {
           CompressionOutputStream out =
               codec.createOutputStream(new java.io.FileOutputStream(args[i]));
           byte[] buffer = new byte[100];
           String inFilename = removeSuffix(args[i], codec.getDefaultExtension());
           java.io.InputStream in = new java.io.FileInputStream(inFilename);
           int len = in.read(buffer);
           while (len > 0) {
             out.write(buffer, 0, len);
             len = in.read(buffer);
           }
           in.close();
           out.close();
         } else {
           CompressionInputStream in =
               codec.createInputStream(new java.io.FileInputStream(args[i]));
           byte[] buffer = new byte[100];
           int len = in.read(buffer);
           while (len > 0) {
             System.out.write(buffer, 0, len);
             len = in.read(buffer);
           }
           in.close();
         }
       }
     }
   }
 }

示例#11

0

显示文件

文件： XMLInputFormat.java 项目： jordanbg/Cloud9

    public XMLRecordReader(FileSplit split, JobConf jobConf) throws IOException {
      if (jobConf.get(START_TAG_KEY) == null || jobConf.get(END_TAG_KEY) == null)
        throw new RuntimeException("Error! XML start and end tags unspecified!");

      startTag = jobConf.get(START_TAG_KEY).getBytes("utf-8");
      endTag = jobConf.get(END_TAG_KEY).getBytes("utf-8");

      start = split.getStart();
      Path file = split.getPath();

      CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(jobConf);
      CompressionCodec codec = compressionCodecs.getCodec(file);

      FileSystem fs = file.getFileSystem(jobConf);

      if (codec != null) {
        sLogger.info("Reading compressed file...");

        // InputStream tempStream = codec.createInputStream(fileIn);
        // fsin = new DataInputStream(tempStream);

        fsin = new DataInputStream(codec.createInputStream(fs.open(file)));

        end = Long.MAX_VALUE;
      } else {
        sLogger.info("Reading uncompressed file...");

        FSDataInputStream fileIn = fs.open(file);

        fileIn.seek(start);
        fsin = fileIn;

        end = start + split.getLength();
      }

      recordStartPos = start;

      // Because input streams of gzipped files are not seekable
      // (specifically, do not support getPos), we need to keep
      // track of bytes consumed ourselves.
      pos = start;
    }

示例#12

0

显示文件

文件： CorpusVocabNormalizerAndNumberizer.java 项目： rahulbhawsar/Cloud9

    public RecordReader<Text, Text> getRecordReader(
        InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException {

      reporter.setStatus(genericSplit.toString());
      FileSplit split = (FileSplit) genericSplit;
      final Path file = split.getPath();
      FileSystem fs = file.getFileSystem(job);
      FSDataInputStream fileIn = fs.open(split.getPath());
      if (compressionCodecs != null && compressionCodecs.getCodec(file) != null)
        throw new RuntimeException("Not handling compression!");

      return new StreamXmlRecordReader(fileIn, split, reporter, job, FileSystem.get(job));
    }

示例#13

0

显示文件

文件： LzoRecordReader.java 项目： vbogdanovsky/elephant-bird

  @Override
  public void initialize(InputSplit genericSplit, TaskAttemptContext context)
      throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    start_ = split.getStart();
    end_ = start_ + split.getLength();
    final Path file = split.getPath();
    Configuration job = HadoopCompat.getConfiguration(context);

    errorTracker = new InputErrorTracker(job);

    LOG.info("input split: " + file + " " + start_ + ":" + end_);

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
      throw new IOException("No codec for file " + file + " found, cannot run");
    }

    // Open the file and seek to the start of the split.
    fileIn_ = fs.open(split.getPath());

    // Creates input stream and also reads the file header.
    createInputReader(codec.createInputStream(fileIn_), job);

    if (start_ != 0) {
      fileIn_.seek(start_);
      skipToNextSyncPoint(false);
      start_ = fileIn_.getPos();
      LOG.info("Start is now " + start_);
    } else {
      skipToNextSyncPoint(true);
    }
    pos_ = start_;
  }

示例#14

0

显示文件

文件： CountryIncomeConf.java 项目： Rachitahuja20/Hadoop-learning

  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {

    Path inputPath = new Path(args[0]);
    Path outputDir = new Path(args[1]);

    // Create configuration
    Configuration conf = new Configuration(true);

    // Create job
    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "CountryIncomeConf");
    job.setJarByClass(CountryIncomeConf.class);

    // Decompressing .gz file Ex. foo.csv.gz to foo.csv

    String uri = args[0];
    FileSystem fs = FileSystem.get(URI.create(uri), conf);

    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    CompressionCodec codec = factory.getCodec(inputPath);
    if (codec == null) {
      System.err.println("No codec found for " + uri);
      System.exit(1);
    }
    String outputUri = CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());
    InputStream in = null;
    OutputStream out = null;
    try {
      in = codec.createInputStream(fs.open(inputPath));
      out = fs.create(new Path(outputUri));
      IOUtils.copyBytes(in, out, conf);
    } finally {
      IOUtils.closeStream(in);
      IOUtils.closeStream(out);
    }

    // Setup MapReduce
    job.setMapperClass(CountryIncomeMapper.class);
    job.setReducerClass(CountryIncomeReducer.class);
    job.setNumReduceTasks(1);

    // Specify key / value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // Input
    // FileInputFormat.addInputPath(job, inputPath);
    FileInputFormat.addInputPaths(job, outputUri);
    job.setInputFormatClass(TextInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    FileSystem hdfs = FileSystem.get(conf);
    if (hdfs.exists(outputDir)) hdfs.delete(outputDir, true);

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;

    // Counter finding and displaying
    Counters counters = job.getCounters();

    // Displaying counters
    System.out.printf(
        "Missing Fields: %d, Error Count: %d\n",
        counters.findCounter(COUNTERS.MISSING_FIELDS_RECORD_COUNT).getValue(),
        counters.findCounter(COUNTERS.NULL_OR_EMPTY).getValue());

    System.exit(code);
  }

示例#15

0

显示文件

文件： Benchmark.java 项目： rajeshbalamohan/ifile_v2

  public void runBenchmark() throws IOException {
    System.out.println();
    System.out.println();
    System.out.println();
    System.out.println();

    /*
     * org.apache.hadoop.io.compress.BZip2Codec,
     * org.apache.hadoop.io.compress.DefaultCodec
     * org.apache.hadoop.io.compress.DeflateCodec
     * org.apache.hadoop.io.compress.GzipCodec
     * org.apache.hadoop.io.compress.Lz4Codec
     * org.apache.hadoop.io.compress.SnappyCodec
     */
    Configuration conf = new Configuration();
    List<Class<? extends CompressionCodec>> codecList = codecFactory.getCodecClasses(conf);
    Set<Class<? extends CompressionCodec>> codecSet =
        new HashSet<Class<? extends CompressionCodec>>();
    codecSet.add(null); // no compression case
    codecSet.addAll(codecList);

    EnumSet<KV_TRAIT> traitSet = EnumSet.of(KV_TRAIT.KV, KV_TRAIT.MULTI_KV);
    for (KV_TRAIT trait : traitSet) {
      boolean rle = false;
      for (Class<? extends CompressionCodec> codec : codecSet) {
        try {
          String fileName =
              "result_"
                  + trait
                  + "_"
                  + ((codec == null) ? "no_compression" : codec.getSimpleName())
                  + "_no_rle.out";
          CompressionCodec compCodec =
              (codec == null) ? null : codecFactory.getCodecByClassName(codec.getName());
          Path file = new Path(".", fileName);
          WriterOptions writeOptions = new WriterOptions();

          conf.set("ifile.trait", trait.toString());
          writeOptions.setConf(conf).setFilePath(fs, file).setCodec(compCodec).setRLE(false);
          createIFile(writeOptions, trait);
          Result rs = new Result(fileName, writeOptions, fs.getFileStatus(file).getLen());
          System.out.println(rs);

          // with RLE
          fileName =
              "result_"
                  + trait
                  + "_"
                  + ((codec == null) ? "no_compression" : codec.getSimpleName())
                  + "_rle.out";
          writeOptions.setRLE(true);
          createIFile(writeOptions, trait);
          rs = new Result(fileName, writeOptions, fs.getFileStatus(file).getLen());
          System.out.println(rs);
        } catch (Throwable t) {
          t.printStackTrace();
          // proceed to the next benchmark.  Quite possible that codec jars aren't available
        }
      }
    }
  }

示例#16

0

显示文件

文件： TestCodecFactory.java 项目： Jude7/bc-hadoop2.0

  public static void testFinding() {
    CompressionCodecFactory factory = new CompressionCodecFactory(new Configuration());
    CompressionCodec codec = factory.getCodec(new Path("/tmp/foo.bar"));
    assertEquals("default factory foo codec", null, codec);
    codec = factory.getCodecByClassName(BarCodec.class.getCanonicalName());
    assertEquals("default factory foo codec", null, codec);

    codec = factory.getCodec(new Path("/tmp/foo.gz"));
    checkCodec("default factory for .gz", GzipCodec.class, codec);
    codec = factory.getCodecByClassName(GzipCodec.class.getCanonicalName());
    checkCodec("default factory for gzip codec", GzipCodec.class, codec);
    codec = factory.getCodecByName("gzip");
    checkCodec("default factory for gzip codec", GzipCodec.class, codec);
    codec = factory.getCodecByName("GZIP");
    checkCodec("default factory for gzip codec", GzipCodec.class, codec);
    codec = factory.getCodecByName("GZIPCodec");
    checkCodec("default factory for gzip codec", GzipCodec.class, codec);
    codec = factory.getCodecByName("gzipcodec");
    checkCodec("default factory for gzip codec", GzipCodec.class, codec);
    Class klass = factory.getCodecClassByName("gzipcodec");
    assertEquals(GzipCodec.class, klass);

    codec = factory.getCodec(new Path("/tmp/foo.bz2"));
    checkCodec("default factory for .bz2", BZip2Codec.class, codec);
    codec = factory.getCodecByClassName(BZip2Codec.class.getCanonicalName());
    checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);
    codec = factory.getCodecByName("bzip2");
    checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);
    codec = factory.getCodecByName("bzip2codec");
    checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);
    codec = factory.getCodecByName("BZIP2");
    checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);
    codec = factory.getCodecByName("BZIP2CODEC");
    checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);

    codec = factory.getCodecByClassName(DeflateCodec.class.getCanonicalName());
    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);
    codec = factory.getCodecByName("deflate");
    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);
    codec = factory.getCodecByName("deflatecodec");
    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);
    codec = factory.getCodecByName("DEFLATE");
    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);
    codec = factory.getCodecByName("DEFLATECODEC");
    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);

    factory = setClasses(new Class[0]);
    // gz, bz2, snappy, lz4 are picked up by service loader, but bar isn't
    codec = factory.getCodec(new Path("/tmp/foo.bar"));
    assertEquals("empty factory bar codec", null, codec);
    codec = factory.getCodecByClassName(BarCodec.class.getCanonicalName());
    assertEquals("empty factory bar codec", null, codec);

    codec = factory.getCodec(new Path("/tmp/foo.gz"));
    checkCodec("empty factory gz codec", GzipCodec.class, codec);
    codec = factory.getCodecByClassName(GzipCodec.class.getCanonicalName());
    checkCodec("empty factory gz codec", GzipCodec.class, codec);

    codec = factory.getCodec(new Path("/tmp/foo.bz2"));
    checkCodec("empty factory for .bz2", BZip2Codec.class, codec);
    codec = factory.getCodecByClassName(BZip2Codec.class.getCanonicalName());
    checkCodec("empty factory for bzip2 codec", BZip2Codec.class, codec);

    codec = factory.getCodec(new Path("/tmp/foo.snappy"));
    checkCodec("empty factory snappy codec", SnappyCodec.class, codec);
    codec = factory.getCodecByClassName(SnappyCodec.class.getCanonicalName());
    checkCodec("empty factory snappy codec", SnappyCodec.class, codec);

    codec = factory.getCodec(new Path("/tmp/foo.lz4"));
    checkCodec("empty factory lz4 codec", Lz4Codec.class, codec);
    codec = factory.getCodecByClassName(Lz4Codec.class.getCanonicalName());
    checkCodec("empty factory lz4 codec", Lz4Codec.class, codec);

    factory = setClasses(new Class[] {BarCodec.class, FooCodec.class, FooBarCodec.class});
    codec = factory.getCodec(new Path("/tmp/.foo.bar.gz"));
    checkCodec("full factory gz codec", GzipCodec.class, codec);
    codec = factory.getCodecByClassName(GzipCodec.class.getCanonicalName());
    checkCodec("full codec gz codec", GzipCodec.class, codec);

    codec = factory.getCodec(new Path("/tmp/foo.bz2"));
    checkCodec("full factory for .bz2", BZip2Codec.class, codec);
    codec = factory.getCodecByClassName(BZip2Codec.class.getCanonicalName());
    checkCodec("full codec bzip2 codec", BZip2Codec.class, codec);

    codec = factory.getCodec(new Path("/tmp/foo.bar"));
    checkCodec("full factory bar codec", BarCodec.class, codec);
    codec = factory.getCodecByClassName(BarCodec.class.getCanonicalName());
    checkCodec("full factory bar codec", BarCodec.class, codec);
    codec = factory.getCodecByName("bar");
    checkCodec("full factory bar codec", BarCodec.class, codec);
    codec = factory.getCodecByName("BAR");
    checkCodec("full factory bar codec", BarCodec.class, codec);

    codec = factory.getCodec(new Path("/tmp/foo/baz.foo.bar"));
    checkCodec("full factory foo bar codec", FooBarCodec.class, codec);
    codec = factory.getCodecByClassName(FooBarCodec.class.getCanonicalName());
    checkCodec("full factory foo bar codec", FooBarCodec.class, codec);
    codec = factory.getCodecByName("foobar");
    checkCodec("full factory foo bar codec", FooBarCodec.class, codec);
    codec = factory.getCodecByName("FOOBAR");
    checkCodec("full factory foo bar codec", FooBarCodec.class, codec);

    codec = factory.getCodec(new Path("/tmp/foo.foo"));
    checkCodec("full factory foo codec", FooCodec.class, codec);
    codec = factory.getCodecByClassName(FooCodec.class.getCanonicalName());
    checkCodec("full factory foo codec", FooCodec.class, codec);
    codec = factory.getCodecByName("foo");
    checkCodec("full factory foo codec", FooCodec.class, codec);
    codec = factory.getCodecByName("FOO");
    checkCodec("full factory foo codec", FooCodec.class, codec);

    factory = setClasses(new Class[] {NewGzipCodec.class});
    codec = factory.getCodec(new Path("/tmp/foo.gz"));
    checkCodec("overridden factory for .gz", NewGzipCodec.class, codec);
    codec = factory.getCodecByClassName(NewGzipCodec.class.getCanonicalName());
    checkCodec("overridden factory for gzip codec", NewGzipCodec.class, codec);
  }

示例#17

0

显示文件

文件： CorpusVocabNormalizerAndNumberizer.java 项目： rahulbhawsar/Cloud9

 protected boolean isSplitable(FileSystem fs, Path file) {
   if (compressionCodecs == null) return true;
   return compressionCodecs.getCodec(file) == null;
 }

示例#18

0

显示文件

文件： TestCodecFactory.java 项目： Jude7/bc-hadoop2.0

 /**
  * Returns a factory for a given set of codecs
  *
  * @param classes the codec classes to include
  * @return a new factory
  */
 private static CompressionCodecFactory setClasses(Class[] classes) {
   Configuration conf = new Configuration();
   CompressionCodecFactory.setCodecClasses(conf, Arrays.asList(classes));
   return new CompressionCodecFactory(conf);
 }