Java LzoIndex示例

编程语言: Java

命名空间/包名称: com.hadoop.compression.lzo

类/类型: LzoIndex

hotexamples.com的示例: 5

Java LzoIndex - 已找到5个示例。这些是从开源项目中提取的最受好评的com.hadoop.compression.lzo.LzoIndex现实Java示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

isEmpty(3)

alignSliceEndToIndex(2)

alignSliceStartToIndex(2)

createIndex(1)

findNextPosition(1)

readIndex(1)

set(1)

示例#1

显示文件

文件： DeprecatedLzoTextInputFormat.java 项目： ronakvorabr/lzo-java

  @Override
  public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {
    FileSplit[] splits = (FileSplit[]) super.getSplits(conf, numSplits);
    // Find new starts/ends of the filesplit that align with the LZO blocks.

    List<FileSplit> result = new ArrayList<FileSplit>();

    for (FileSplit fileSplit : splits) {
      Path file = fileSplit.getPath();
      FileSystem fs = file.getFileSystem(conf);
      LzoIndex index = indexes.get(file);
      if (index == null) {
        throw new IOException("Index not found for " + file);
      }
      if (index.isEmpty()) {
        // Empty index, keep it as is.
        result.add(fileSplit);
        continue;
      }

      long start = fileSplit.getStart();
      long end = start + fileSplit.getLength();

      long lzoStart = index.alignSliceStartToIndex(start, end);
      long lzoEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen());

      if (lzoStart != LzoIndex.NOT_FOUND && lzoEnd != LzoIndex.NOT_FOUND) {
        result.add(new FileSplit(file, lzoStart, lzoEnd - lzoStart, fileSplit.getLocations()));
      }
    }

    return result.toArray(new FileSplit[result.size()]);
  }

示例#2

显示文件

文件： TestLzoTextInputFormat.java 项目： huagetai/elephant-bird

  /**
   * Generate random data, compress it, index and md5 hash the data. Then read it all back and md5
   * that too, to verify that it all went ok.
   *
   * @param testWithIndex Should we index or not?
   * @param charsToOutput How many characters of random data should we output.
   * @throws IOException
   * @throws NoSuchAlgorithmException
   * @throws InterruptedException
   */
  private void runTest(boolean testWithIndex, int charsToOutput)
      throws IOException, NoSuchAlgorithmException, InterruptedException {

    Configuration conf = new Configuration();
    conf.setLong("fs.local.block.size", charsToOutput / 2);
    // reducing block size to force a split of the tiny file
    conf.set("io.compression.codecs", LzopCodec.class.getName());

    Assume.assumeTrue(CoreTestUtil.okToRunLzoTests(conf));

    FileSystem.getLocal(conf).close(); // remove cached filesystem (if any)
    FileSystem localFs = FileSystem.getLocal(conf);
    localFs.delete(outputDir_, true);
    localFs.mkdirs(outputDir_);

    Job job = new Job(conf);
    TextOutputFormat.setCompressOutput(job, true);
    TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class);
    TextOutputFormat.setOutputPath(job, outputDir_);

    TaskAttemptContext attemptContext =
        new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2));

    // create some input data
    byte[] expectedMd5 = createTestInput(outputDir_, localFs, attemptContext, charsToOutput);

    if (testWithIndex) {
      Path lzoFile = new Path(outputDir_, lzoFileName_);
      LzoIndex.createIndex(localFs, lzoFile);
    }

    LzoTextInputFormat inputFormat = new LzoTextInputFormat();
    TextInputFormat.setInputPaths(job, outputDir_);

    List<InputSplit> is = inputFormat.getSplits(job);
    // verify we have the right number of lzo chunks
    if (testWithIndex && OUTPUT_BIG == charsToOutput) {
      assertEquals(3, is.size());
    } else {
      assertEquals(1, is.size());
    }

    // let's read it all and calculate the md5 hash
    for (InputSplit inputSplit : is) {
      RecordReader<LongWritable, Text> rr =
          inputFormat.createRecordReader(inputSplit, attemptContext);
      rr.initialize(inputSplit, attemptContext);

      while (rr.nextKeyValue()) {
        Text value = rr.getCurrentValue();

        md5_.update(value.getBytes(), 0, value.getLength());
      }

      rr.close();
    }

    localFs.close();
    assertTrue(Arrays.equals(expectedMd5, md5_.digest()));
  }

示例#3

显示文件

文件： DeprecatedLzoTextInputFormat.java 项目： ronakvorabr/lzo-java

  @Override
  protected FileStatus[] listStatus(JobConf conf) throws IOException {
    List<FileStatus> files = new ArrayList<FileStatus>(Arrays.asList(super.listStatus(conf)));

    String fileExtension = new LzopCodec().getDefaultExtension();

    Iterator<FileStatus> it = files.iterator();
    while (it.hasNext()) {
      FileStatus fileStatus = it.next();
      Path file = fileStatus.getPath();

      if (!file.toString().endsWith(fileExtension)) {
        // Get rid of non-LZO files.
        it.remove();
      } else {
        FileSystem fs = file.getFileSystem(conf);
        LzoIndex index = LzoIndex.readIndex(fs, file);
        indexes.put(file, index);
      }
    }

    return files.toArray(new FileStatus[] {});
  }

示例#4

显示文件

文件： TestLzoTextInputFormat.java 项目： huagetai/elephant-bird

  /** Make sure the lzo index class works as described. */
  @Test
  public void testLzoIndex() {
    LzoIndex index = new LzoIndex();
    assertTrue(index.isEmpty());
    index = new LzoIndex(4);
    index.set(0, 0);
    index.set(1, 5);
    index.set(2, 10);
    index.set(3, 15);
    assertFalse(index.isEmpty());

    assertEquals(0, index.findNextPosition(-1));
    assertEquals(5, index.findNextPosition(1));
    assertEquals(5, index.findNextPosition(5));
    assertEquals(15, index.findNextPosition(11));
    assertEquals(15, index.findNextPosition(15));
    assertEquals(-1, index.findNextPosition(16));

    assertEquals(5, index.alignSliceStartToIndex(3, 20));
    assertEquals(15, index.alignSliceStartToIndex(15, 20));
    assertEquals(10, index.alignSliceEndToIndex(8, 30));
    assertEquals(10, index.alignSliceEndToIndex(10, 30));
    assertEquals(30, index.alignSliceEndToIndex(17, 30));
    assertEquals(LzoIndex.NOT_FOUND, index.alignSliceStartToIndex(16, 20));
  }

示例#5

显示文件

文件： DeprecatedLzoTextInputFormat.java 项目： ronakvorabr/lzo-java

 @Override
 protected boolean isSplitable(FileSystem fs, Path filename) {
   LzoIndex index = indexes.get(filename);
   return !index.isEmpty();
 }