private Collection<Throwable> execute_bam(String source) throws IOException { SamReader in = null; SAMRecordIterator iter = null; try { SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT); if (source == null) { in = srf.open(SamInputResource.of(stdin())); } else { in = srf.open(SamInputResource.of(source)); } iter = in.iterator(); bindings.put("header", in.getFileHeader()); bindings.put("iter", iter); bindings.put("format", "sam"); this.script.eval(bindings); return RETURN_OK; } catch (Exception e) { LOG.error(e); return wrapException(e); } finally { CloserUtil.close(in); CloserUtil.close(iter); bindings.remove("header"); bindings.remove("iter"); bindings.remove("format"); } }
public SamReader getSamReader(ResourceLocator locator, boolean requireIndex) throws IOException { if (requireIndex) { final SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT); SeekableStream indexStream = getIndexStream(locator.getBamIndexPath()); this.indexed = true; SeekableStream ss = new IGVSeekableBufferedStream( IGVSeekableStreamFactory.getInstance().getStreamFor(url), 128000); SamInputResource resource = SamInputResource.of(ss).index(indexStream); return factory.open(resource); } else { InputStream is = HttpUtils.getInstance().openConnectionStream(url); return new SAMFileReader(new BufferedInputStream(is)); } }
@Test public void testAddCommentsToBam() throws Exception { final File outputFile = File.createTempFile("addCommentsToBamTest.", BamFileIoUtils.BAM_FILE_EXTENSION); runIt(BAM_FILE, outputFile, commentList); final SAMFileHeader newHeader = SamReaderFactory.makeDefault().getFileHeader(outputFile); // The original comments are massaged when they're added to the header. Perform the same // massaging here, // and then compare the lists final List<String> massagedComments = new LinkedList<>(); for (final String comment : commentList) { massagedComments.add(SAMTextHeaderCodec.COMMENT_PREFIX + comment); } Assert.assertEquals(newHeader.getComments(), massagedComments); }
@Exec public void exec() throws IOException, CommandArgumentException { if (filename == null) { throw new CommandArgumentException("You must specify an input BAM filename!"); } SamReaderFactory readerFactory = SamReaderFactory.makeDefault(); if (lenient) { readerFactory.validationStringency(ValidationStringency.LENIENT); } else if (silent) { readerFactory.validationStringency(ValidationStringency.SILENT); } SamReader reader = null; String name; FileChannel channel = null; if (filename.equals("-")) { reader = readerFactory.open(SamInputResource.of(System.in)); name = "<stdin>"; } else { File f = new File(filename); FileInputStream fis = new FileInputStream(f); channel = fis.getChannel(); reader = readerFactory.open(SamInputResource.of(fis)); name = f.getName(); } Iterator<SAMRecord> it = ProgressUtils.getIterator( name, reader.iterator(), (channel == null) ? null : new FileChannelStats(channel), new ProgressMessage<SAMRecord>() { long i = 0; @Override public String msg(SAMRecord current) { i++; return i + " " + current.getReadName(); } }, new CloseableFinalizer<SAMRecord>()); long i = 0; while (it.hasNext()) { i++; it.next(); } reader.close(); System.err.println("Successfully read: " + i + " records."); }
@Override protected Object doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); if (INPUT.getAbsolutePath().endsWith(".sam")) { throw new UserException("SAM files are not supported"); } final SAMFileHeader samFileHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(INPUT); for (final String comment : COMMENT) { if (comment.contains("\n")) { throw new UserException("Comments can not contain a new line"); } samFileHeader.addComment(comment); } BamFileIoUtils.reheaderBamFile(samFileHeader, INPUT, OUTPUT, CREATE_MD5_FILE, CREATE_INDEX); return null; }
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); // Setup all the inputs final ProgressLogger progress = new ProgressLogger(log, 10000000, "Processed", "loci"); final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE); final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); final SamLocusIterator iterator = getLocusIterator(in); final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>(); final CountingFilter dupeFilter = new CountingDuplicateFilter(); final CountingFilter mapqFilter = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY); final CountingPairedFilter pairFilter = new CountingPairedFilter(); filters.add(mapqFilter); filters.add(dupeFilter); if (!COUNT_UNPAIRED) { filters.add(pairFilter); } filters.add( new SecondaryAlignmentFilter()); // Not a counting filter because we never want to count // reads twice iterator.setSamFilters(filters); iterator.setEmitUncoveredLoci(true); iterator.setMappingQualityScoreCutoff(0); // Handled separately because we want to count bases iterator.setQualityScoreCutoff(0); // Handled separately because we want to count bases iterator.setIncludeNonPfReads(false); final int max = COVERAGE_CAP; final long[] HistogramArray = new long[max + 1]; final long[] baseQHistogramArray = new long[Byte.MAX_VALUE]; final boolean usingStopAfter = STOP_AFTER > 0; final long stopAfter = STOP_AFTER - 1; long counter = 0; long basesExcludedByBaseq = 0; long basesExcludedByOverlap = 0; long basesExcludedByCapping = 0; // Loop through all the loci while (iterator.hasNext()) { final SamLocusIterator.LocusInfo info = iterator.next(); // Check that the reference is not N final ReferenceSequence ref = refWalker.get(info.getSequenceIndex()); final byte base = ref.getBases()[info.getPosition() - 1]; if (base == 'N') continue; // Figure out the coverage while not counting overlapping reads twice, and excluding various // things final HashSet<String> readNames = new HashSet<String>(info.getRecordAndPositions().size()); int pileupSize = 0; for (final SamLocusIterator.RecordAndOffset recs : info.getRecordAndPositions()) { if (recs.getBaseQuality() < MINIMUM_BASE_QUALITY) { ++basesExcludedByBaseq; continue; } if (!readNames.add(recs.getRecord().getReadName())) { ++basesExcludedByOverlap; continue; } pileupSize++; if (pileupSize <= max) { baseQHistogramArray[recs.getRecord().getBaseQualities()[recs.getOffset()]]++; } } final int depth = Math.min(readNames.size(), max); if (depth < readNames.size()) basesExcludedByCapping += readNames.size() - max; HistogramArray[depth]++; // Record progress and perhaps stop progress.record(info.getSequenceName(), info.getPosition()); if (usingStopAfter && ++counter > stopAfter) break; } // Construct and write the outputs final Histogram<Integer> histo = new Histogram<Integer>("coverage", "count"); for (int i = 0; i < HistogramArray.length; ++i) { histo.increment(i, HistogramArray[i]); } // Construct and write the outputs final Histogram<Integer> baseQHisto = new Histogram<Integer>("value", "baseq_count"); for (int i = 0; i < baseQHistogramArray.length; ++i) { baseQHisto.increment(i, baseQHistogramArray[i]); } final WgsMetrics metrics = generateWgsMetrics(); metrics.GENOME_TERRITORY = (long) histo.getSumOfValues(); metrics.MEAN_COVERAGE = histo.getMean(); metrics.SD_COVERAGE = histo.getStandardDeviation(); metrics.MEDIAN_COVERAGE = histo.getMedian(); metrics.MAD_COVERAGE = histo.getMedianAbsoluteDeviation(); final long basesExcludedByDupes = getBasesExcludedBy(dupeFilter); final long basesExcludedByMapq = getBasesExcludedBy(mapqFilter); final long basesExcludedByPairing = getBasesExcludedBy(pairFilter); final double total = histo.getSum(); final double totalWithExcludes = total + basesExcludedByDupes + basesExcludedByMapq + basesExcludedByPairing + basesExcludedByBaseq + basesExcludedByOverlap + basesExcludedByCapping; metrics.PCT_EXC_DUPE = basesExcludedByDupes / totalWithExcludes; metrics.PCT_EXC_MAPQ = basesExcludedByMapq / totalWithExcludes; metrics.PCT_EXC_UNPAIRED = basesExcludedByPairing / totalWithExcludes; metrics.PCT_EXC_BASEQ = basesExcludedByBaseq / totalWithExcludes; metrics.PCT_EXC_OVERLAP = basesExcludedByOverlap / totalWithExcludes; metrics.PCT_EXC_CAPPED = basesExcludedByCapping / totalWithExcludes; metrics.PCT_EXC_TOTAL = (totalWithExcludes - total) / totalWithExcludes; metrics.PCT_1X = MathUtil.sum(HistogramArray, 1, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_5X = MathUtil.sum(HistogramArray, 5, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_10X = MathUtil.sum(HistogramArray, 10, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_15X = MathUtil.sum(HistogramArray, 15, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_20X = MathUtil.sum(HistogramArray, 20, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_25X = MathUtil.sum(HistogramArray, 25, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_30X = MathUtil.sum(HistogramArray, 30, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_40X = MathUtil.sum(HistogramArray, 40, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_50X = MathUtil.sum(HistogramArray, 50, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_60X = MathUtil.sum(HistogramArray, 60, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_70X = MathUtil.sum(HistogramArray, 70, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_80X = MathUtil.sum(HistogramArray, 80, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_90X = MathUtil.sum(HistogramArray, 90, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_100X = MathUtil.sum(HistogramArray, 100, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; final MetricsFile<WgsMetrics, Integer> out = getMetricsFile(); out.addMetric(metrics); out.addHistogram(histo); if (INCLUDE_BQ_HISTOGRAM) { out.addHistogram(baseQHisto); } out.write(OUTPUT); return 0; }
/** * Main method for the program. Checks that all input files are present and readable and that the * output file can be written to. Then iterates through all the records accumulating metrics. * Finally writes metrics file */ protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); final Histogram<Integer> mismatchesHist = new Histogram<Integer>("Predicted", "Mismatches"); final Histogram<Integer> totalHist = new Histogram<Integer>("Predicted", "Total_Bases"); final Map<String, Histogram> mismatchesByTypeHist = new HashMap<String, Histogram>(); final Map<String, Histogram> totalByTypeHist = new HashMap<String, Histogram>(); // Set up the histograms byte[] bases = {'A', 'C', 'G', 'T'}; for (final byte base : bases) { final Histogram<Integer> h = new Histogram<Integer>("Predicted", (char) base + ">"); mismatchesByTypeHist.put((char) base + ">", h); final Histogram<Integer> h2 = new Histogram<Integer>("Predicted", ">" + (char) base); mismatchesByTypeHist.put(">" + (char) base, h2); } for (final byte base : bases) { final Histogram<Integer> h = new Histogram<Integer>("Predicted", (char) base + ">"); totalByTypeHist.put((char) base + ">", h); final Histogram<Integer> h2 = new Histogram<Integer>("Predicted", ">" + (char) base); totalByTypeHist.put(">" + (char) base, h2); } for (final SAMRecord record : reader) { // Ignore these as we don't know the truth if (record.getReadUnmappedFlag() || record.isSecondaryOrSupplementary()) { continue; } final byte[] readBases = record.getReadBases(); final byte[] readQualities = record.getBaseQualities(); final byte[] refBases = SequenceUtil.makeReferenceFromAlignment(record, false); // We've seen stranger things if (readQualities.length != readBases.length) { throw new PicardException( "Missing Qualities (" + readQualities.length + "," + readBases.length + ") : " + record.getSAMString()); } if (refBases.length != readBases.length) { throw new PicardException( "The read length did not match the inferred reference length, please check your MD and CIGAR."); } int cycleIndex; // zero-based if (record.getReadNegativeStrandFlag()) { cycleIndex = readBases.length - 1 + CYCLE_OFFSET; } else { cycleIndex = CYCLE_OFFSET; } for (int i = 0; i < readBases.length; i++) { if (-1 == CYCLE || cycleIndex == CYCLE) { if ('-' != refBases[i] && '0' != refBases[i]) { // not insertion and not soft-clipped if (!SequenceUtil.basesEqual(readBases[i], refBases[i])) { // mismatch mismatchesHist.increment((int) readQualities[i]); if (SequenceUtil.isValidBase(refBases[i])) { mismatchesByTypeHist .get((char) refBases[i] + ">") .increment((int) readQualities[i]); } if (SequenceUtil.isValidBase(readBases[i])) { mismatchesByTypeHist .get(">" + (char) readBases[i]) .increment((int) readQualities[i]); } } else { mismatchesHist.increment( (int) readQualities[i], 0); // to make sure the bin will exist } totalHist.increment((int) readQualities[i]); if (SequenceUtil.isValidBase(readBases[i])) { totalByTypeHist.get(">" + (char) readBases[i]).increment((int) readQualities[i]); } if (SequenceUtil.isValidBase(refBases[i])) { totalByTypeHist.get((char) refBases[i] + ">").increment((int) readQualities[i]); } } } cycleIndex += record.getReadNegativeStrandFlag() ? -1 : 1; } } CloserUtil.close(reader); final Histogram<Integer> hist = new Histogram<Integer>("Predicted", "Observed"); double sumOfSquaresError = 0.0; // compute the aggregate phred values for (final Integer key : mismatchesHist.keySet()) { final double numMismatches = mismatchesHist.get(key).getValue(); final double numBases = totalHist.get(key).getValue(); final double phredErr = Math.log10(numMismatches / numBases) * -10.0; sumOfSquaresError += (0 == numMismatches) ? 0.0 : (key - phredErr) * (key - phredErr); hist.increment(key, phredErr); // make sure the bin will exist for (final byte base : bases) { mismatchesByTypeHist.get(">" + (char) base).increment(key, 0.0); mismatchesByTypeHist.get((char) base + ">").increment(key, 0.0); totalByTypeHist.get(">" + (char) base).increment(key, 0.0); totalByTypeHist.get((char) base + ">").increment(key, 0.0); } } final QualityScoreAccuracyMetrics metrics = new QualityScoreAccuracyMetrics(); metrics.SUM_OF_SQUARE_ERROR = sumOfSquaresError; final MetricsFile<QualityScoreAccuracyMetrics, Integer> out = getMetricsFile(); out.addMetric(metrics); out.addHistogram(hist); for (final byte base : bases) { // >base : histograms for mismatches *to* the given base Histogram<Integer> m = mismatchesByTypeHist.get(">" + (char) base); Histogram<Integer> t = totalByTypeHist.get(">" + (char) base); Histogram<Integer> h = new Histogram<Integer>(m.getBinLabel(), m.getValueLabel()); for (final Integer key : m.keySet()) { final double numMismatches = m.get(key).getValue(); final double numBases = t.get(key).getValue(); final double phredErr = Math.log10(numMismatches / numBases) * -10.0; h.increment(key, phredErr); } out.addHistogram(h); // base> : histograms for mismatches *from* the given base m = mismatchesByTypeHist.get((char) base + ">"); t = totalByTypeHist.get(">" + (char) base); h = new Histogram<Integer>(m.getBinLabel(), m.getValueLabel()); for (final Integer key : m.keySet()) { final double numMismatches = m.get(key).getValue(); final double numBases = t.get(key).getValue(); final double phredErr = Math.log10(numMismatches / numBases) * -10.0; h.increment(key, phredErr); } out.addHistogram(h); } out.addHistogram(mismatchesHist); out.addHistogram(totalHist); out.write(OUTPUT); return 0; }
@Override public int doWork(String[] args) { String region = null; com.github.lindenb.jvarkit.util.cli.GetOpt opt = new com.github.lindenb.jvarkit.util.cli.GetOpt(); int c; while ((c = opt.getopt(args, getGetOptDefault() + "r:R:")) != -1) { switch (c) { case 'r': region = opt.getOptArg(); break; case 'R': this.referenceFile = new File(opt.getOptArg()); break; default: { switch (handleOtherOptions(c, opt, args)) { case EXIT_FAILURE: return -1; case EXIT_SUCCESS: return 0; default: break; } } } } File faidx = null; File bamFile = null; if (opt.getOptInd() + 1 == args.length) { bamFile = new File(args[opt.getOptInd()]); } else if (opt.getOptInd() + 2 == args.length) { bamFile = new File(args[opt.getOptInd()]); faidx = new File(args[opt.getOptInd() + 1]); } else { System.err.println("Illegal Number of arguments."); return -1; } IndexedFastaSequenceFile ref = null; PrintWriter out = new PrintWriter(System.out); SamReader samReader = null; try { info("opening " + bamFile); SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT); samReader = srf.open(bamFile); Interval interval = IntervalUtils.parseOne(samReader.getFileHeader().getSequenceDictionary(), region); if (interval == null) { error("Bad interval " + interval); return -1; } if (faidx != null) { ref = new IndexedFastaSequenceFile(faidx); } TViewHandler handler = new AsciiHandler(); new TView().execute(samReader, ref, interval, handler); } catch (Exception e) { e.printStackTrace(); return -1; } finally { out.flush(); CloserUtil.close(samReader); CloserUtil.close(ref); } return 0; }
/** Combines multiple SAM/BAM files into one. */ @Override protected int doWork() { boolean matchedSortOrders = true; // read interval list if it is defined final List<Interval> intervalList = (INTERVALS == null ? null : IntervalList.fromFile(INTERVALS).uniqued().getIntervals()); // map reader->iterator used if INTERVALS is defined final Map<SamReader, CloseableIterator<SAMRecord>> samReaderToIterator = new HashMap<SamReader, CloseableIterator<SAMRecord>>(INPUT.size()); // Open the files for reading and writing final List<SamReader> readers = new ArrayList<SamReader>(); final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(); { SAMSequenceDictionary dict = null; // Used to try and reduce redundant SDs in memory for (final File inFile : INPUT) { IOUtil.assertFileIsReadable(inFile); final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile); if (INTERVALS != null) { if (!in.hasIndex()) throw new PicardException( "Merging with interval but Bam file is not indexed " + inFile); final CloseableIterator<SAMRecord> samIterator = new SamRecordIntervalIteratorFactory() .makeSamRecordIntervalIterator(in, intervalList, true); samReaderToIterator.put(in, samIterator); } readers.add(in); headers.add(in.getFileHeader()); // A slightly hackish attempt to keep memory consumption down when merging multiple files // with // large sequence dictionaries (10,000s of sequences). If the dictionaries are identical, // then // replace the duplicate copies with a single dictionary to reduce the memory footprint. if (dict == null) { dict = in.getFileHeader().getSequenceDictionary(); } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) { in.getFileHeader().setSequenceDictionary(dict); } matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER; } } // If all the input sort orders match the output sort order then just merge them and // write on the fly, otherwise setup to merge and sort before writing out the final file IOUtil.assertFileIsWritable(OUTPUT); final boolean presorted; final SAMFileHeader.SortOrder headerMergerSortOrder; final boolean mergingSamRecordIteratorAssumeSorted; if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED || INTERVALS != null) { log.info( "Input files are in same order as output so sorting to temp directory is not needed."); headerMergerSortOrder = SORT_ORDER; mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED; presorted = true; } else { log.info("Sorting input files using temp directory " + TMP_DIR); headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted; mergingSamRecordIteratorAssumeSorted = false; presorted = false; } final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES); final MergingSamRecordIterator iterator; // no interval defined, get an iterator for the whole bam if (intervalList == null) { iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted); } else { // show warning related to https://github.com/broadinstitute/picard/pull/314/files log.info( "Warning: merged bams from different interval lists may contain the same read in both files"); iterator = new MergingSamRecordIterator(headerMerger, samReaderToIterator, true); } final SAMFileHeader header = headerMerger.getMergedHeader(); for (final String comment : COMMENT) { header.addComment(comment); } header.setSortOrder(SORT_ORDER); final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory(); if (USE_THREADING) { samFileWriterFactory.setUseAsyncIo(true); } final SAMFileWriter out = samFileWriterFactory.makeSAMOrBAMWriter(header, presorted, OUTPUT); // Lastly loop through and write out the records final ProgressLogger progress = new ProgressLogger(log, PROGRESS_INTERVAL); while (iterator.hasNext()) { final SAMRecord record = iterator.next(); out.addAlignment(record); progress.record(record); } log.info("Finished reading inputs."); for (final CloseableIterator<SAMRecord> iter : samReaderToIterator.values()) CloserUtil.close(iter); CloserUtil.close(readers); out.close(); return 0; }
/** * Test that PG header records are created & chained appropriately (or not created), and that the * PG record chains are as expected. MarkDuplicates is used both to merge and to mark dupes in * this case. * * @param suppressPg If true, do not create PG header record. * @param expectedPnVnByReadName For each read, info about the expect chain of PG records. */ @Test(dataProvider = "pgRecordChainingTest") public void pgRecordChainingTest( final boolean suppressPg, final Map<String, List<ExpectedPnAndVn>> expectedPnVnByReadName) { final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp"); outputDir.deleteOnExit(); try { // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header // record creation according to suppressPg. final MarkDuplicates markDuplicates = new MarkDuplicates(); final ArrayList<String> args = new ArrayList<String>(); for (int i = 1; i <= 3; ++i) { args.add("INPUT=" + new File(TEST_DATA_DIR, "merge" + i + ".sam").getAbsolutePath()); } final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam"); args.add("OUTPUT=" + outputSam.getAbsolutePath()); args.add( "METRICS_FILE=" + new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics").getAbsolutePath()); if (suppressPg) args.add("PROGRAM_RECORD_ID=null"); // I generally prefer to call doWork rather than invoking the argument parser, but it is // necessary // in this case to initialize the command line. // Note that for the unit test, version won't come through because it is obtained through jar // manifest, and unit test doesn't run code from a jar. Assert.assertEquals(markDuplicates.instanceMain(args.toArray(new String[args.size()])), 0); // Read the MarkDuplicates output file, and get the PG ID for each read. In this particular // test, // the PG ID should be the same for both ends of a pair. final SamReader reader = SamReaderFactory.makeDefault().open(outputSam); final Map<String, String> pgIdForReadName = new HashMap<String, String>(); for (final SAMRecord rec : reader) { final String existingPgId = pgIdForReadName.get(rec.getReadName()); final String thisPgId = rec.getStringAttribute(SAMTag.PG.name()); if (existingPgId != null) { Assert.assertEquals(thisPgId, existingPgId); } else { pgIdForReadName.put(rec.getReadName(), thisPgId); } } final SAMFileHeader header = reader.getFileHeader(); CloserUtil.close(reader); // Confirm that for each read name, the chain of PG records contains exactly the number that // is expected, // and that values in the PG chain are as expected. for (final Map.Entry<String, List<ExpectedPnAndVn>> entry : expectedPnVnByReadName.entrySet()) { final String readName = entry.getKey(); final List<ExpectedPnAndVn> expectedList = entry.getValue(); String pgId = pgIdForReadName.get(readName); for (final ExpectedPnAndVn expected : expectedList) { final SAMProgramRecord programRecord = header.getProgramRecord(pgId); if (expected.expectedPn != null) Assert.assertEquals(programRecord.getProgramName(), expected.expectedPn); if (expected.expectedVn != null) Assert.assertEquals(programRecord.getProgramVersion(), expected.expectedVn); pgId = programRecord.getPreviousProgramGroupId(); } Assert.assertNull(pgId); } } finally { TestUtil.recursiveDelete(outputDir); } }
private BAMProcessorImplDict(File in) { reader = SamReaderFactory.makeDefault().open(in); iterator = reader.iterator(); queue = new LinkedList<SAMRecord>(); }