@Override public void initialize(InputSplit spl, TaskAttemptContext ctx) throws IOException { // This method should only be called once (see Hadoop API). However, // there seems to be disagreement between implementations that call // initialize() and Hadoop-BAM's own code that relies on // {@link BAMInputFormat} to call initialize() when the reader is // created. Therefore we add this check for the time being. if (isInitialized) close(); isInitialized = true; final Configuration conf = ctx.getConfiguration(); final FileVirtualSplit split = (FileVirtualSplit) spl; final Path file = split.getPath(); final FileSystem fs = file.getFileSystem(conf); this.stringency = SAMHeaderReader.getValidationStringency(conf); final FSDataInputStream in = fs.open(file); final SAMFileHeader header = SAMHeaderReader.readSAMHeaderFrom(in, conf); codec = new BAMRecordCodec(header); in.seek(0); bci = new BlockCompressedInputStream( new WrapSeekable<FSDataInputStream>(in, fs.getFileStatus(file).getLen(), file)); virtualStart = split.getStartVirtualOffset(); fileStart = virtualStart >>> 16; virtualEnd = split.getEndVirtualOffset(); bci.seek(virtualStart); codec.setInputStream(bci); if (BAMInputFormat.DEBUG_BAM_SPLITTER) { final long recordStart = virtualStart & 0xffff; System.err.println( "XXX inizialized BAMRecordReader byte offset: " + fileStart + " record offset: " + recordStart); } keepReadPairsTogether = SortOrder.queryname.equals(header.getSortOrder()) && conf.getBoolean(BAMInputFormat.KEEP_PAIRED_READS_TOGETHER_PROPERTY, false); readPair = false; lastOfPair = false; intervals = BAMInputFormat.getIntervals(conf); if (intervals != null) { overlapDetector = new OverlapDetector<>(0, 0); overlapDetector.addAll(intervals, intervals); } }
/** * Create a common SAMFileWriter for use with GATK tools. * * @param outputFile - if this file has a .cram extension then a reference is required. Can not be * null. * @param referenceFile - the reference source to use. Can not be null if a output file has a * .cram extension. * @param header - header to be used for the output writer * @param preSorted - if true then the records must already be sorted to match the header sort * order * @param createOutputBamIndex - if true an index will be created for .BAM and .CRAM files * @param createMD5 - if true an MD5 file will be created * @return SAMFileWriter */ public static SAMFileWriter createCommonSAMWriter( final File outputFile, final File referenceFile, final SAMFileHeader header, final boolean preSorted, boolean createOutputBamIndex, final boolean createMD5) { Utils.nonNull(outputFile); Utils.nonNull(header); if (createOutputBamIndex && header.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { logger.warn( "Skipping index file creation for: " + outputFile.getAbsolutePath() + ". Index file creation requires reads in coordinate sorted order."); createOutputBamIndex = false; } final SAMFileWriterFactory factory = new SAMFileWriterFactory().setCreateIndex(createOutputBamIndex).setCreateMd5File(createMD5); return ReadUtils.createCommonSAMWriterFromFactory( factory, outputFile, referenceFile, header, preSorted); }