@Override public void initialize(InputSplit spl, TaskAttemptContext ctx) throws IOException { // This method should only be called once (see Hadoop API). However, // there seems to be disagreement between implementations that call // initialize() and Hadoop-BAM's own code that relies on // {@link BAMInputFormat} to call initialize() when the reader is // created. Therefore we add this check for the time being. if (isInitialized) close(); isInitialized = true; final Configuration conf = ctx.getConfiguration(); final FileVirtualSplit split = (FileVirtualSplit) spl; final Path file = split.getPath(); final FileSystem fs = file.getFileSystem(conf); this.stringency = SAMHeaderReader.getValidationStringency(conf); final FSDataInputStream in = fs.open(file); final SAMFileHeader header = SAMHeaderReader.readSAMHeaderFrom(in, conf); codec = new BAMRecordCodec(header); in.seek(0); bci = new BlockCompressedInputStream( new WrapSeekable<FSDataInputStream>(in, fs.getFileStatus(file).getLen(), file)); virtualStart = split.getStartVirtualOffset(); fileStart = virtualStart >>> 16; virtualEnd = split.getEndVirtualOffset(); bci.seek(virtualStart); codec.setInputStream(bci); if (BAMInputFormat.DEBUG_BAM_SPLITTER) { final long recordStart = virtualStart & 0xffff; System.err.println( "XXX inizialized BAMRecordReader byte offset: " + fileStart + " record offset: " + recordStart); } keepReadPairsTogether = SortOrder.queryname.equals(header.getSortOrder()) && conf.getBoolean(BAMInputFormat.KEEP_PAIRED_READS_TOGETHER_PROPERTY, false); readPair = false; lastOfPair = false; intervals = BAMInputFormat.getIntervals(conf); if (intervals != null) { overlapDetector = new OverlapDetector<>(0, 0); overlapDetector.addAll(intervals, intervals); } }
public BAMSplitGuesser(SeekableStream ss, InputStream headerStream, Configuration conf) throws IOException { inFile = ss; buf = ByteBuffer.allocate(8); buf.order(ByteOrder.LITTLE_ENDIAN); referenceSequenceCount = SAMHeaderReader.readSAMHeaderFrom(headerStream, conf).getSequenceDictionary().size(); bamCodec = new BAMRecordCodec(null, new LazyBAMRecordFactory()); }