예제 #1
0
  @Override
  public void initialize(InputSplit spl, TaskAttemptContext ctx) throws IOException {
    // This method should only be called once (see Hadoop API). However,
    // there seems to be disagreement between implementations that call
    // initialize() and Hadoop-BAM's own code that relies on
    // {@link BAMInputFormat} to call initialize() when the reader is
    // created. Therefore we add this check for the time being.
    if (isInitialized) close();
    isInitialized = true;

    final Configuration conf = ctx.getConfiguration();

    final FileVirtualSplit split = (FileVirtualSplit) spl;
    final Path file = split.getPath();
    final FileSystem fs = file.getFileSystem(conf);

    this.stringency = SAMHeaderReader.getValidationStringency(conf);

    final FSDataInputStream in = fs.open(file);

    final SAMFileHeader header = SAMHeaderReader.readSAMHeaderFrom(in, conf);
    codec = new BAMRecordCodec(header);

    in.seek(0);
    bci =
        new BlockCompressedInputStream(
            new WrapSeekable<FSDataInputStream>(in, fs.getFileStatus(file).getLen(), file));

    virtualStart = split.getStartVirtualOffset();

    fileStart = virtualStart >>> 16;
    virtualEnd = split.getEndVirtualOffset();

    bci.seek(virtualStart);
    codec.setInputStream(bci);

    if (BAMInputFormat.DEBUG_BAM_SPLITTER) {
      final long recordStart = virtualStart & 0xffff;
      System.err.println(
          "XXX inizialized BAMRecordReader byte offset: "
              + fileStart
              + " record offset: "
              + recordStart);
    }

    keepReadPairsTogether =
        SortOrder.queryname.equals(header.getSortOrder())
            && conf.getBoolean(BAMInputFormat.KEEP_PAIRED_READS_TOGETHER_PROPERTY, false);
    readPair = false;
    lastOfPair = false;
    intervals = BAMInputFormat.getIntervals(conf);
    if (intervals != null) {
      overlapDetector = new OverlapDetector<>(0, 0);
      overlapDetector.addAll(intervals, intervals);
    }
  }
예제 #2
0
  public BAMSplitGuesser(SeekableStream ss, InputStream headerStream, Configuration conf)
      throws IOException {
    inFile = ss;

    buf = ByteBuffer.allocate(8);
    buf.order(ByteOrder.LITTLE_ENDIAN);

    referenceSequenceCount =
        SAMHeaderReader.readSAMHeaderFrom(headerStream, conf).getSequenceDictionary().size();

    bamCodec = new BAMRecordCodec(null, new LazyBAMRecordFactory());
  }