Esempio n. 1
0
  /** Combines multiple SAM/BAM files into one. */
  @Override
  protected int doWork() {
    boolean matchedSortOrders = true;

    // read interval list if it is defined
    final List<Interval> intervalList =
        (INTERVALS == null ? null : IntervalList.fromFile(INTERVALS).uniqued().getIntervals());
    // map reader->iterator used if INTERVALS is defined
    final Map<SamReader, CloseableIterator<SAMRecord>> samReaderToIterator =
        new HashMap<SamReader, CloseableIterator<SAMRecord>>(INPUT.size());

    // Open the files for reading and writing
    final List<SamReader> readers = new ArrayList<SamReader>();
    final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
    {
      SAMSequenceDictionary dict = null; // Used to try and reduce redundant SDs in memory

      for (final File inFile : INPUT) {
        IOUtil.assertFileIsReadable(inFile);
        final SamReader in =
            SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
        if (INTERVALS != null) {
          if (!in.hasIndex())
            throw new PicardException(
                "Merging with interval but Bam file is not indexed " + inFile);
          final CloseableIterator<SAMRecord> samIterator =
              new SamRecordIntervalIteratorFactory()
                  .makeSamRecordIntervalIterator(in, intervalList, true);
          samReaderToIterator.put(in, samIterator);
        }

        readers.add(in);
        headers.add(in.getFileHeader());

        // A slightly hackish attempt to keep memory consumption down when merging multiple files
        // with
        // large sequence dictionaries (10,000s of sequences). If the dictionaries are identical,
        // then
        // replace the duplicate copies with a single dictionary to reduce the memory footprint.
        if (dict == null) {
          dict = in.getFileHeader().getSequenceDictionary();
        } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
          in.getFileHeader().setSequenceDictionary(dict);
        }

        matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
      }
    }

    // If all the input sort orders match the output sort order then just merge them and
    // write on the fly, otherwise setup to merge and sort before writing out the final file
    IOUtil.assertFileIsWritable(OUTPUT);
    final boolean presorted;
    final SAMFileHeader.SortOrder headerMergerSortOrder;
    final boolean mergingSamRecordIteratorAssumeSorted;

    if (matchedSortOrders
        || SORT_ORDER == SAMFileHeader.SortOrder.unsorted
        || ASSUME_SORTED
        || INTERVALS != null) {
      log.info(
          "Input files are in same order as output so sorting to temp directory is not needed.");
      headerMergerSortOrder = SORT_ORDER;
      mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
      presorted = true;
    } else {
      log.info("Sorting input files using temp directory " + TMP_DIR);
      headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
      mergingSamRecordIteratorAssumeSorted = false;
      presorted = false;
    }
    final SamFileHeaderMerger headerMerger =
        new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
    final MergingSamRecordIterator iterator;
    // no interval defined, get an iterator for the whole bam
    if (intervalList == null) {
      iterator =
          new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
    } else {
      // show warning related to https://github.com/broadinstitute/picard/pull/314/files
      log.info(
          "Warning: merged bams from different interval lists may contain the same read in both files");
      iterator = new MergingSamRecordIterator(headerMerger, samReaderToIterator, true);
    }
    final SAMFileHeader header = headerMerger.getMergedHeader();
    for (final String comment : COMMENT) {
      header.addComment(comment);
    }
    header.setSortOrder(SORT_ORDER);
    final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
    if (USE_THREADING) {
      samFileWriterFactory.setUseAsyncIo(true);
    }
    final SAMFileWriter out = samFileWriterFactory.makeSAMOrBAMWriter(header, presorted, OUTPUT);

    // Lastly loop through and write out the records
    final ProgressLogger progress = new ProgressLogger(log, PROGRESS_INTERVAL);
    while (iterator.hasNext()) {
      final SAMRecord record = iterator.next();
      out.addAlignment(record);
      progress.record(record);
    }

    log.info("Finished reading inputs.");
    for (final CloseableIterator<SAMRecord> iter : samReaderToIterator.values())
      CloserUtil.close(iter);
    CloserUtil.close(readers);
    out.close();
    return 0;
  }