Пример #1
0
 private Collection<Throwable> execute_bam(String source) throws IOException {
   SamReader in = null;
   SAMRecordIterator iter = null;
   try {
     SamReaderFactory srf =
         SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
     if (source == null) {
       in = srf.open(SamInputResource.of(stdin()));
     } else {
       in = srf.open(SamInputResource.of(source));
     }
     iter = in.iterator();
     bindings.put("header", in.getFileHeader());
     bindings.put("iter", iter);
     bindings.put("format", "sam");
     this.script.eval(bindings);
     return RETURN_OK;
   } catch (Exception e) {
     LOG.error(e);
     return wrapException(e);
   } finally {
     CloserUtil.close(in);
     CloserUtil.close(iter);
     bindings.remove("header");
     bindings.remove("iter");
     bindings.remove("format");
   }
 }
Пример #2
0
  public SamReader getSamReader(ResourceLocator locator, boolean requireIndex) throws IOException {

    if (requireIndex) {
      final SamReaderFactory factory =
          SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);

      SeekableStream indexStream = getIndexStream(locator.getBamIndexPath());
      this.indexed = true;

      SeekableStream ss =
          new IGVSeekableBufferedStream(
              IGVSeekableStreamFactory.getInstance().getStreamFor(url), 128000);
      SamInputResource resource = SamInputResource.of(ss).index(indexStream);
      return factory.open(resource);
    } else {
      InputStream is = HttpUtils.getInstance().openConnectionStream(url);
      return new SAMFileReader(new BufferedInputStream(is));
    }
  }
  @Test
  public void testAddCommentsToBam() throws Exception {
    final File outputFile =
        File.createTempFile("addCommentsToBamTest.", BamFileIoUtils.BAM_FILE_EXTENSION);
    runIt(BAM_FILE, outputFile, commentList);

    final SAMFileHeader newHeader = SamReaderFactory.makeDefault().getFileHeader(outputFile);

    // The original comments are massaged when they're added to the header. Perform the same
    // massaging here,
    // and then compare the lists
    final List<String> massagedComments = new LinkedList<>();
    for (final String comment : commentList) {
      massagedComments.add(SAMTextHeaderCodec.COMMENT_PREFIX + comment);
    }

    Assert.assertEquals(newHeader.getComments(), massagedComments);
  }
Пример #4
0
  @Exec
  public void exec() throws IOException, CommandArgumentException {
    if (filename == null) {
      throw new CommandArgumentException("You must specify an input BAM filename!");
    }
    SamReaderFactory readerFactory = SamReaderFactory.makeDefault();
    if (lenient) {
      readerFactory.validationStringency(ValidationStringency.LENIENT);
    } else if (silent) {
      readerFactory.validationStringency(ValidationStringency.SILENT);
    }

    SamReader reader = null;
    String name;
    FileChannel channel = null;
    if (filename.equals("-")) {
      reader = readerFactory.open(SamInputResource.of(System.in));
      name = "<stdin>";
    } else {
      File f = new File(filename);
      FileInputStream fis = new FileInputStream(f);
      channel = fis.getChannel();
      reader = readerFactory.open(SamInputResource.of(fis));
      name = f.getName();
    }

    Iterator<SAMRecord> it =
        ProgressUtils.getIterator(
            name,
            reader.iterator(),
            (channel == null) ? null : new FileChannelStats(channel),
            new ProgressMessage<SAMRecord>() {
              long i = 0;

              @Override
              public String msg(SAMRecord current) {
                i++;
                return i + " " + current.getReadName();
              }
            },
            new CloseableFinalizer<SAMRecord>());
    long i = 0;
    while (it.hasNext()) {
      i++;
      it.next();
    }
    reader.close();
    System.err.println("Successfully read: " + i + " records.");
  }
Пример #5
0
  @Override
  protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    if (INPUT.getAbsolutePath().endsWith(".sam")) {
      throw new UserException("SAM files are not supported");
    }

    final SAMFileHeader samFileHeader =
        SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(INPUT);
    for (final String comment : COMMENT) {
      if (comment.contains("\n")) {
        throw new UserException("Comments can not contain a new line");
      }
      samFileHeader.addComment(comment);
    }

    BamFileIoUtils.reheaderBamFile(samFileHeader, INPUT, OUTPUT, CREATE_MD5_FILE, CREATE_INDEX);

    return null;
  }
Пример #6
0
  @Override
  protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);

    // Setup all the inputs
    final ProgressLogger progress = new ProgressLogger(log, 10000000, "Processed", "loci");
    final ReferenceSequenceFileWalker refWalker =
        new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final SamReader in =
        SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    final SamLocusIterator iterator = getLocusIterator(in);

    final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>();
    final CountingFilter dupeFilter = new CountingDuplicateFilter();
    final CountingFilter mapqFilter = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY);
    final CountingPairedFilter pairFilter = new CountingPairedFilter();
    filters.add(mapqFilter);
    filters.add(dupeFilter);
    if (!COUNT_UNPAIRED) {
      filters.add(pairFilter);
    }
    filters.add(
        new SecondaryAlignmentFilter()); // Not a counting filter because we never want to count
                                         // reads twice
    iterator.setSamFilters(filters);
    iterator.setEmitUncoveredLoci(true);
    iterator.setMappingQualityScoreCutoff(0); // Handled separately because we want to count bases
    iterator.setQualityScoreCutoff(0); // Handled separately because we want to count bases
    iterator.setIncludeNonPfReads(false);

    final int max = COVERAGE_CAP;
    final long[] HistogramArray = new long[max + 1];
    final long[] baseQHistogramArray = new long[Byte.MAX_VALUE];
    final boolean usingStopAfter = STOP_AFTER > 0;
    final long stopAfter = STOP_AFTER - 1;
    long counter = 0;

    long basesExcludedByBaseq = 0;
    long basesExcludedByOverlap = 0;
    long basesExcludedByCapping = 0;

    // Loop through all the loci
    while (iterator.hasNext()) {
      final SamLocusIterator.LocusInfo info = iterator.next();

      // Check that the reference is not N
      final ReferenceSequence ref = refWalker.get(info.getSequenceIndex());
      final byte base = ref.getBases()[info.getPosition() - 1];
      if (base == 'N') continue;

      // Figure out the coverage while not counting overlapping reads twice, and excluding various
      // things
      final HashSet<String> readNames = new HashSet<String>(info.getRecordAndPositions().size());
      int pileupSize = 0;
      for (final SamLocusIterator.RecordAndOffset recs : info.getRecordAndPositions()) {

        if (recs.getBaseQuality() < MINIMUM_BASE_QUALITY) {
          ++basesExcludedByBaseq;
          continue;
        }
        if (!readNames.add(recs.getRecord().getReadName())) {
          ++basesExcludedByOverlap;
          continue;
        }
        pileupSize++;
        if (pileupSize <= max) {
          baseQHistogramArray[recs.getRecord().getBaseQualities()[recs.getOffset()]]++;
        }
      }

      final int depth = Math.min(readNames.size(), max);
      if (depth < readNames.size()) basesExcludedByCapping += readNames.size() - max;
      HistogramArray[depth]++;

      // Record progress and perhaps stop
      progress.record(info.getSequenceName(), info.getPosition());
      if (usingStopAfter && ++counter > stopAfter) break;
    }

    // Construct and write the outputs
    final Histogram<Integer> histo = new Histogram<Integer>("coverage", "count");
    for (int i = 0; i < HistogramArray.length; ++i) {
      histo.increment(i, HistogramArray[i]);
    }

    // Construct and write the outputs
    final Histogram<Integer> baseQHisto = new Histogram<Integer>("value", "baseq_count");
    for (int i = 0; i < baseQHistogramArray.length; ++i) {
      baseQHisto.increment(i, baseQHistogramArray[i]);
    }

    final WgsMetrics metrics = generateWgsMetrics();
    metrics.GENOME_TERRITORY = (long) histo.getSumOfValues();
    metrics.MEAN_COVERAGE = histo.getMean();
    metrics.SD_COVERAGE = histo.getStandardDeviation();
    metrics.MEDIAN_COVERAGE = histo.getMedian();
    metrics.MAD_COVERAGE = histo.getMedianAbsoluteDeviation();

    final long basesExcludedByDupes = getBasesExcludedBy(dupeFilter);
    final long basesExcludedByMapq = getBasesExcludedBy(mapqFilter);
    final long basesExcludedByPairing = getBasesExcludedBy(pairFilter);
    final double total = histo.getSum();
    final double totalWithExcludes =
        total
            + basesExcludedByDupes
            + basesExcludedByMapq
            + basesExcludedByPairing
            + basesExcludedByBaseq
            + basesExcludedByOverlap
            + basesExcludedByCapping;
    metrics.PCT_EXC_DUPE = basesExcludedByDupes / totalWithExcludes;
    metrics.PCT_EXC_MAPQ = basesExcludedByMapq / totalWithExcludes;
    metrics.PCT_EXC_UNPAIRED = basesExcludedByPairing / totalWithExcludes;
    metrics.PCT_EXC_BASEQ = basesExcludedByBaseq / totalWithExcludes;
    metrics.PCT_EXC_OVERLAP = basesExcludedByOverlap / totalWithExcludes;
    metrics.PCT_EXC_CAPPED = basesExcludedByCapping / totalWithExcludes;
    metrics.PCT_EXC_TOTAL = (totalWithExcludes - total) / totalWithExcludes;

    metrics.PCT_1X =
        MathUtil.sum(HistogramArray, 1, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_5X =
        MathUtil.sum(HistogramArray, 5, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_10X =
        MathUtil.sum(HistogramArray, 10, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_15X =
        MathUtil.sum(HistogramArray, 15, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_20X =
        MathUtil.sum(HistogramArray, 20, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_25X =
        MathUtil.sum(HistogramArray, 25, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_30X =
        MathUtil.sum(HistogramArray, 30, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_40X =
        MathUtil.sum(HistogramArray, 40, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_50X =
        MathUtil.sum(HistogramArray, 50, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_60X =
        MathUtil.sum(HistogramArray, 60, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_70X =
        MathUtil.sum(HistogramArray, 70, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_80X =
        MathUtil.sum(HistogramArray, 80, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_90X =
        MathUtil.sum(HistogramArray, 90, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
    metrics.PCT_100X =
        MathUtil.sum(HistogramArray, 100, HistogramArray.length)
            / (double) metrics.GENOME_TERRITORY;

    final MetricsFile<WgsMetrics, Integer> out = getMetricsFile();
    out.addMetric(metrics);
    out.addHistogram(histo);
    if (INCLUDE_BQ_HISTOGRAM) {
      out.addHistogram(baseQHisto);
    }
    out.write(OUTPUT);

    return 0;
  }
  /**
   * Main method for the program. Checks that all input files are present and readable and that the
   * output file can be written to. Then iterates through all the records accumulating metrics.
   * Finally writes metrics file
   */
  protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final SamReader reader =
        SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);

    final Histogram<Integer> mismatchesHist = new Histogram<Integer>("Predicted", "Mismatches");
    final Histogram<Integer> totalHist = new Histogram<Integer>("Predicted", "Total_Bases");
    final Map<String, Histogram> mismatchesByTypeHist = new HashMap<String, Histogram>();
    final Map<String, Histogram> totalByTypeHist = new HashMap<String, Histogram>();

    // Set up the histograms
    byte[] bases = {'A', 'C', 'G', 'T'};
    for (final byte base : bases) {
      final Histogram<Integer> h = new Histogram<Integer>("Predicted", (char) base + ">");
      mismatchesByTypeHist.put((char) base + ">", h);
      final Histogram<Integer> h2 = new Histogram<Integer>("Predicted", ">" + (char) base);
      mismatchesByTypeHist.put(">" + (char) base, h2);
    }
    for (final byte base : bases) {
      final Histogram<Integer> h = new Histogram<Integer>("Predicted", (char) base + ">");
      totalByTypeHist.put((char) base + ">", h);
      final Histogram<Integer> h2 = new Histogram<Integer>("Predicted", ">" + (char) base);
      totalByTypeHist.put(">" + (char) base, h2);
    }

    for (final SAMRecord record : reader) {
      // Ignore these as we don't know the truth
      if (record.getReadUnmappedFlag() || record.isSecondaryOrSupplementary()) {
        continue;
      }
      final byte[] readBases = record.getReadBases();
      final byte[] readQualities = record.getBaseQualities();
      final byte[] refBases = SequenceUtil.makeReferenceFromAlignment(record, false);

      // We've seen stranger things
      if (readQualities.length != readBases.length) {
        throw new PicardException(
            "Missing Qualities ("
                + readQualities.length
                + ","
                + readBases.length
                + ") : "
                + record.getSAMString());
      }

      if (refBases.length != readBases.length) {
        throw new PicardException(
            "The read length did not match the inferred reference length, please check your MD and CIGAR.");
      }

      int cycleIndex; // zero-based
      if (record.getReadNegativeStrandFlag()) {
        cycleIndex = readBases.length - 1 + CYCLE_OFFSET;
      } else {
        cycleIndex = CYCLE_OFFSET;
      }

      for (int i = 0; i < readBases.length; i++) {
        if (-1 == CYCLE || cycleIndex == CYCLE) {
          if ('-' != refBases[i] && '0' != refBases[i]) { // not insertion and not soft-clipped
            if (!SequenceUtil.basesEqual(readBases[i], refBases[i])) { // mismatch
              mismatchesHist.increment((int) readQualities[i]);
              if (SequenceUtil.isValidBase(refBases[i])) {
                mismatchesByTypeHist
                    .get((char) refBases[i] + ">")
                    .increment((int) readQualities[i]);
              }
              if (SequenceUtil.isValidBase(readBases[i])) {
                mismatchesByTypeHist
                    .get(">" + (char) readBases[i])
                    .increment((int) readQualities[i]);
              }
            } else {
              mismatchesHist.increment(
                  (int) readQualities[i], 0); // to make sure the bin will exist
            }
            totalHist.increment((int) readQualities[i]);
            if (SequenceUtil.isValidBase(readBases[i])) {
              totalByTypeHist.get(">" + (char) readBases[i]).increment((int) readQualities[i]);
            }
            if (SequenceUtil.isValidBase(refBases[i])) {
              totalByTypeHist.get((char) refBases[i] + ">").increment((int) readQualities[i]);
            }
          }
        }
        cycleIndex += record.getReadNegativeStrandFlag() ? -1 : 1;
      }
    }
    CloserUtil.close(reader);

    final Histogram<Integer> hist = new Histogram<Integer>("Predicted", "Observed");

    double sumOfSquaresError = 0.0;

    // compute the aggregate phred values
    for (final Integer key : mismatchesHist.keySet()) {
      final double numMismatches = mismatchesHist.get(key).getValue();
      final double numBases = totalHist.get(key).getValue();
      final double phredErr = Math.log10(numMismatches / numBases) * -10.0;
      sumOfSquaresError += (0 == numMismatches) ? 0.0 : (key - phredErr) * (key - phredErr);
      hist.increment(key, phredErr);

      // make sure the bin will exist
      for (final byte base : bases) {
        mismatchesByTypeHist.get(">" + (char) base).increment(key, 0.0);
        mismatchesByTypeHist.get((char) base + ">").increment(key, 0.0);
        totalByTypeHist.get(">" + (char) base).increment(key, 0.0);
        totalByTypeHist.get((char) base + ">").increment(key, 0.0);
      }
    }

    final QualityScoreAccuracyMetrics metrics = new QualityScoreAccuracyMetrics();
    metrics.SUM_OF_SQUARE_ERROR = sumOfSquaresError;

    final MetricsFile<QualityScoreAccuracyMetrics, Integer> out = getMetricsFile();
    out.addMetric(metrics);
    out.addHistogram(hist);
    for (final byte base : bases) {
      // >base : histograms for mismatches *to* the given base
      Histogram<Integer> m = mismatchesByTypeHist.get(">" + (char) base);
      Histogram<Integer> t = totalByTypeHist.get(">" + (char) base);
      Histogram<Integer> h = new Histogram<Integer>(m.getBinLabel(), m.getValueLabel());
      for (final Integer key : m.keySet()) {
        final double numMismatches = m.get(key).getValue();
        final double numBases = t.get(key).getValue();
        final double phredErr = Math.log10(numMismatches / numBases) * -10.0;
        h.increment(key, phredErr);
      }
      out.addHistogram(h);

      // base> : histograms for mismatches *from* the given base
      m = mismatchesByTypeHist.get((char) base + ">");
      t = totalByTypeHist.get(">" + (char) base);
      h = new Histogram<Integer>(m.getBinLabel(), m.getValueLabel());
      for (final Integer key : m.keySet()) {
        final double numMismatches = m.get(key).getValue();
        final double numBases = t.get(key).getValue();
        final double phredErr = Math.log10(numMismatches / numBases) * -10.0;
        h.increment(key, phredErr);
      }
      out.addHistogram(h);
    }

    out.addHistogram(mismatchesHist);
    out.addHistogram(totalHist);
    out.write(OUTPUT);

    return 0;
  }
Пример #8
0
  @Override
  public int doWork(String[] args) {
    String region = null;
    com.github.lindenb.jvarkit.util.cli.GetOpt opt =
        new com.github.lindenb.jvarkit.util.cli.GetOpt();
    int c;
    while ((c = opt.getopt(args, getGetOptDefault() + "r:R:")) != -1) {
      switch (c) {
        case 'r':
          region = opt.getOptArg();
          break;
        case 'R':
          this.referenceFile = new File(opt.getOptArg());
          break;
        default:
          {
            switch (handleOtherOptions(c, opt, args)) {
              case EXIT_FAILURE:
                return -1;
              case EXIT_SUCCESS:
                return 0;
              default:
                break;
            }
          }
      }
    }

    File faidx = null;
    File bamFile = null;

    if (opt.getOptInd() + 1 == args.length) {
      bamFile = new File(args[opt.getOptInd()]);
    } else if (opt.getOptInd() + 2 == args.length) {
      bamFile = new File(args[opt.getOptInd()]);
      faidx = new File(args[opt.getOptInd() + 1]);
    } else {
      System.err.println("Illegal Number of arguments.");
      return -1;
    }

    IndexedFastaSequenceFile ref = null;
    PrintWriter out = new PrintWriter(System.out);
    SamReader samReader = null;
    try {
      info("opening " + bamFile);
      SamReaderFactory srf =
          SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
      samReader = srf.open(bamFile);

      Interval interval =
          IntervalUtils.parseOne(samReader.getFileHeader().getSequenceDictionary(), region);
      if (interval == null) {
        error("Bad interval " + interval);
        return -1;
      }
      if (faidx != null) {
        ref = new IndexedFastaSequenceFile(faidx);
      }

      TViewHandler handler = new AsciiHandler();
      new TView().execute(samReader, ref, interval, handler);
    } catch (Exception e) {
      e.printStackTrace();
      return -1;
    } finally {
      out.flush();
      CloserUtil.close(samReader);
      CloserUtil.close(ref);
    }
    return 0;
  }
Пример #9
0
  /** Combines multiple SAM/BAM files into one. */
  @Override
  protected int doWork() {
    boolean matchedSortOrders = true;

    // read interval list if it is defined
    final List<Interval> intervalList =
        (INTERVALS == null ? null : IntervalList.fromFile(INTERVALS).uniqued().getIntervals());
    // map reader->iterator used if INTERVALS is defined
    final Map<SamReader, CloseableIterator<SAMRecord>> samReaderToIterator =
        new HashMap<SamReader, CloseableIterator<SAMRecord>>(INPUT.size());

    // Open the files for reading and writing
    final List<SamReader> readers = new ArrayList<SamReader>();
    final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
    {
      SAMSequenceDictionary dict = null; // Used to try and reduce redundant SDs in memory

      for (final File inFile : INPUT) {
        IOUtil.assertFileIsReadable(inFile);
        final SamReader in =
            SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
        if (INTERVALS != null) {
          if (!in.hasIndex())
            throw new PicardException(
                "Merging with interval but Bam file is not indexed " + inFile);
          final CloseableIterator<SAMRecord> samIterator =
              new SamRecordIntervalIteratorFactory()
                  .makeSamRecordIntervalIterator(in, intervalList, true);
          samReaderToIterator.put(in, samIterator);
        }

        readers.add(in);
        headers.add(in.getFileHeader());

        // A slightly hackish attempt to keep memory consumption down when merging multiple files
        // with
        // large sequence dictionaries (10,000s of sequences). If the dictionaries are identical,
        // then
        // replace the duplicate copies with a single dictionary to reduce the memory footprint.
        if (dict == null) {
          dict = in.getFileHeader().getSequenceDictionary();
        } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
          in.getFileHeader().setSequenceDictionary(dict);
        }

        matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
      }
    }

    // If all the input sort orders match the output sort order then just merge them and
    // write on the fly, otherwise setup to merge and sort before writing out the final file
    IOUtil.assertFileIsWritable(OUTPUT);
    final boolean presorted;
    final SAMFileHeader.SortOrder headerMergerSortOrder;
    final boolean mergingSamRecordIteratorAssumeSorted;

    if (matchedSortOrders
        || SORT_ORDER == SAMFileHeader.SortOrder.unsorted
        || ASSUME_SORTED
        || INTERVALS != null) {
      log.info(
          "Input files are in same order as output so sorting to temp directory is not needed.");
      headerMergerSortOrder = SORT_ORDER;
      mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
      presorted = true;
    } else {
      log.info("Sorting input files using temp directory " + TMP_DIR);
      headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
      mergingSamRecordIteratorAssumeSorted = false;
      presorted = false;
    }
    final SamFileHeaderMerger headerMerger =
        new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
    final MergingSamRecordIterator iterator;
    // no interval defined, get an iterator for the whole bam
    if (intervalList == null) {
      iterator =
          new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
    } else {
      // show warning related to https://github.com/broadinstitute/picard/pull/314/files
      log.info(
          "Warning: merged bams from different interval lists may contain the same read in both files");
      iterator = new MergingSamRecordIterator(headerMerger, samReaderToIterator, true);
    }
    final SAMFileHeader header = headerMerger.getMergedHeader();
    for (final String comment : COMMENT) {
      header.addComment(comment);
    }
    header.setSortOrder(SORT_ORDER);
    final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
    if (USE_THREADING) {
      samFileWriterFactory.setUseAsyncIo(true);
    }
    final SAMFileWriter out = samFileWriterFactory.makeSAMOrBAMWriter(header, presorted, OUTPUT);

    // Lastly loop through and write out the records
    final ProgressLogger progress = new ProgressLogger(log, PROGRESS_INTERVAL);
    while (iterator.hasNext()) {
      final SAMRecord record = iterator.next();
      out.addAlignment(record);
      progress.record(record);
    }

    log.info("Finished reading inputs.");
    for (final CloseableIterator<SAMRecord> iter : samReaderToIterator.values())
      CloserUtil.close(iter);
    CloserUtil.close(readers);
    out.close();
    return 0;
  }
Пример #10
0
  /**
   * Test that PG header records are created & chained appropriately (or not created), and that the
   * PG record chains are as expected. MarkDuplicates is used both to merge and to mark dupes in
   * this case.
   *
   * @param suppressPg If true, do not create PG header record.
   * @param expectedPnVnByReadName For each read, info about the expect chain of PG records.
   */
  @Test(dataProvider = "pgRecordChainingTest")
  public void pgRecordChainingTest(
      final boolean suppressPg, final Map<String, List<ExpectedPnAndVn>> expectedPnVnByReadName) {
    final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp");
    outputDir.deleteOnExit();
    try {
      // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header
      // record creation according to suppressPg.
      final MarkDuplicates markDuplicates = new MarkDuplicates();
      final ArrayList<String> args = new ArrayList<String>();
      for (int i = 1; i <= 3; ++i) {
        args.add("INPUT=" + new File(TEST_DATA_DIR, "merge" + i + ".sam").getAbsolutePath());
      }
      final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam");
      args.add("OUTPUT=" + outputSam.getAbsolutePath());
      args.add(
          "METRICS_FILE="
              + new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics").getAbsolutePath());
      if (suppressPg) args.add("PROGRAM_RECORD_ID=null");

      // I generally prefer to call doWork rather than invoking the argument parser, but it is
      // necessary
      // in this case to initialize the command line.
      // Note that for the unit test, version won't come through because it is obtained through jar
      // manifest, and unit test doesn't run code from a jar.
      Assert.assertEquals(markDuplicates.instanceMain(args.toArray(new String[args.size()])), 0);

      // Read the MarkDuplicates output file, and get the PG ID for each read.  In this particular
      // test,
      // the PG ID should be the same for both ends of a pair.
      final SamReader reader = SamReaderFactory.makeDefault().open(outputSam);

      final Map<String, String> pgIdForReadName = new HashMap<String, String>();
      for (final SAMRecord rec : reader) {
        final String existingPgId = pgIdForReadName.get(rec.getReadName());
        final String thisPgId = rec.getStringAttribute(SAMTag.PG.name());
        if (existingPgId != null) {
          Assert.assertEquals(thisPgId, existingPgId);
        } else {
          pgIdForReadName.put(rec.getReadName(), thisPgId);
        }
      }
      final SAMFileHeader header = reader.getFileHeader();
      CloserUtil.close(reader);

      // Confirm that for each read name, the chain of PG records contains exactly the number that
      // is expected,
      // and that values in the PG chain are as expected.
      for (final Map.Entry<String, List<ExpectedPnAndVn>> entry :
          expectedPnVnByReadName.entrySet()) {
        final String readName = entry.getKey();
        final List<ExpectedPnAndVn> expectedList = entry.getValue();
        String pgId = pgIdForReadName.get(readName);
        for (final ExpectedPnAndVn expected : expectedList) {
          final SAMProgramRecord programRecord = header.getProgramRecord(pgId);
          if (expected.expectedPn != null)
            Assert.assertEquals(programRecord.getProgramName(), expected.expectedPn);
          if (expected.expectedVn != null)
            Assert.assertEquals(programRecord.getProgramVersion(), expected.expectedVn);
          pgId = programRecord.getPreviousProgramGroupId();
        }
        Assert.assertNull(pgId);
      }

    } finally {
      TestUtil.recursiveDelete(outputDir);
    }
  }
 private BAMProcessorImplDict(File in) {
   reader = SamReaderFactory.makeDefault().open(in);
   iterator = reader.iterator();
   queue = new LinkedList<SAMRecord>();
 }