/** Make sure the lzo index class works as described. */ @Test public void testLzoIndex() { LzoIndex index = new LzoIndex(); assertTrue(index.isEmpty()); index = new LzoIndex(4); index.set(0, 0); index.set(1, 5); index.set(2, 10); index.set(3, 15); assertFalse(index.isEmpty()); assertEquals(0, index.findNextPosition(-1)); assertEquals(5, index.findNextPosition(1)); assertEquals(5, index.findNextPosition(5)); assertEquals(15, index.findNextPosition(11)); assertEquals(15, index.findNextPosition(15)); assertEquals(-1, index.findNextPosition(16)); assertEquals(5, index.alignSliceStartToIndex(3, 20)); assertEquals(15, index.alignSliceStartToIndex(15, 20)); assertEquals(10, index.alignSliceEndToIndex(8, 30)); assertEquals(10, index.alignSliceEndToIndex(10, 30)); assertEquals(30, index.alignSliceEndToIndex(17, 30)); assertEquals(LzoIndex.NOT_FOUND, index.alignSliceStartToIndex(16, 20)); }
@Override public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException { FileSplit[] splits = (FileSplit[]) super.getSplits(conf, numSplits); // Find new starts/ends of the filesplit that align with the LZO blocks. List<FileSplit> result = new ArrayList<FileSplit>(); for (FileSplit fileSplit : splits) { Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); LzoIndex index = indexes.get(file); if (index == null) { throw new IOException("Index not found for " + file); } if (index.isEmpty()) { // Empty index, keep it as is. result.add(fileSplit); continue; } long start = fileSplit.getStart(); long end = start + fileSplit.getLength(); long lzoStart = index.alignSliceStartToIndex(start, end); long lzoEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen()); if (lzoStart != LzoIndex.NOT_FOUND && lzoEnd != LzoIndex.NOT_FOUND) { result.add(new FileSplit(file, lzoStart, lzoEnd - lzoStart, fileSplit.getLocations())); } } return result.toArray(new FileSplit[result.size()]); }