/** Ensure that splitting read groups still works when dealing with a sample-split pileup. */ @Test public void testSplitBySample() { SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1"); readGroupOne.setSample("sample1"); SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2"); readGroupTwo.setSample("sample2"); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); header.addReadGroup(readGroupOne); header.addReadGroup(readGroupTwo); GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10); read1.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10); read2.setAttribute("RG", readGroupTwo.getId()); GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10); read3.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header, "read4", 0, 1, 10); read4.setAttribute("RG", readGroupTwo.getId()); ReadBackedPileupImpl sample1Pileup = new ReadBackedPileupImpl(null, Arrays.asList(read1, read3), Arrays.asList(1, 1)); ReadBackedPileupImpl sample2Pileup = new ReadBackedPileupImpl(null, Arrays.asList(read2, read4), Arrays.asList(1, 1)); Map<String, ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String, ReadBackedPileupImpl>(); sampleToPileupMap.put(readGroupOne.getSample(), sample1Pileup); sampleToPileupMap.put(readGroupTwo.getSample(), sample2Pileup); ReadBackedPileup compositePileup = new ReadBackedPileupImpl(null, sampleToPileupMap); ReadBackedPileup rg1Pileup = compositePileup.getPileupForReadGroup("rg1"); List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads(); Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg1"); Assert.assertEquals( rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't"); Assert.assertEquals( rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't"); ReadBackedPileup rg2Pileup = compositePileup.getPileupForReadGroup("rg2"); List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads(); Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg2"); Assert.assertEquals( rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't"); Assert.assertEquals( rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't"); }
@Test public void testGetPileupForSample() { String sample1 = "sample1"; String sample2 = "sample2"; SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1"); readGroupOne.setSample(sample1); SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2"); readGroupTwo.setSample(sample2); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); header.addReadGroup(readGroupOne); header.addReadGroup(readGroupTwo); GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10); read1.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10); read2.setAttribute("RG", readGroupTwo.getId()); Map<String, ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String, ReadBackedPileupImpl>(); sampleToPileupMap.put( sample1, new ReadBackedPileupImpl(null, Collections.singletonList(read1), 0)); sampleToPileupMap.put( sample2, new ReadBackedPileupImpl(null, Collections.singletonList(read2), 0)); ReadBackedPileup pileup = new ReadBackedPileupImpl(null, sampleToPileupMap); ReadBackedPileup sample2Pileup = pileup.getPileupForSample(sample2); Assert.assertEquals( sample2Pileup.getNumberOfElements(), 1, "Sample 2 pileup has wrong number of elements"); Assert.assertEquals( sample2Pileup.getReads().get(0), read2, "Sample 2 pileup has incorrect read"); ReadBackedPileup missingSamplePileup = pileup.getPileupForSample("missing"); Assert.assertNull(missingSamplePileup, "Pileup for sample 'missing' should be null but isn't"); missingSamplePileup = pileup.getPileupForSample("not here"); Assert.assertNull(missingSamplePileup, "Pileup for sample 'not here' should be null but isn't"); }
/** Ensure that basic read group splitting works. */ @Test public void testSplitByReadGroup() { SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1"); SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2"); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); header.addReadGroup(readGroupOne); header.addReadGroup(readGroupTwo); GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10); read1.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10); read2.setAttribute("RG", readGroupTwo.getId()); GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10); read3.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header, "read4", 0, 1, 10); read4.setAttribute("RG", readGroupTwo.getId()); GATKSAMRecord read5 = ArtificialSAMUtils.createArtificialRead(header, "read5", 0, 1, 10); read5.setAttribute("RG", readGroupTwo.getId()); GATKSAMRecord read6 = ArtificialSAMUtils.createArtificialRead(header, "read6", 0, 1, 10); read6.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read7 = ArtificialSAMUtils.createArtificialRead(header, "read7", 0, 1, 10); read7.setAttribute("RG", readGroupOne.getId()); ReadBackedPileup pileup = new ReadBackedPileupImpl( null, Arrays.asList(read1, read2, read3, read4, read5, read6, read7), Arrays.asList(1, 1, 1, 1, 1, 1, 1)); ReadBackedPileup rg1Pileup = pileup.getPileupForReadGroup("rg1"); List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads(); Assert.assertEquals(rg1Reads.size(), 4, "Wrong number of reads in read group rg1"); Assert.assertEquals( rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't"); Assert.assertEquals( rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't"); Assert.assertEquals( rg1Reads.get(2), read6, "Read " + read6.getReadName() + " should be in rg1 but isn't"); Assert.assertEquals( rg1Reads.get(3), read7, "Read " + read7.getReadName() + " should be in rg1 but isn't"); ReadBackedPileup rg2Pileup = pileup.getPileupForReadGroup("rg2"); List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads(); Assert.assertEquals(rg2Reads.size(), 3, "Wrong number of reads in read group rg2"); Assert.assertEquals( rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't"); Assert.assertEquals( rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't"); Assert.assertEquals( rg2Reads.get(2), read5, "Read " + read5.getReadName() + " should be in rg2 but isn't"); }
/** Get the ID of the readgroup. */ public static String getID(final SAMReadGroupRecord rg) { final String pu = rg.getPlatformUnit(); return pu == null ? rg.getId() : pu; }
@Override public void onTraversalDone(Integer sum) { final GATKReport report = new GATKReport(); report.addTable(TABLE_NAME, "Table of read group properties", 12); GATKReportTable table = report.getTable(TABLE_NAME); DateFormat dateFormatter = DateFormat.getDateInstance(DateFormat.SHORT); table.addColumn("readgroup"); // * Emits a GATKReport containing read group, sample, library, platform, center, median insert // size and // * median read length for each read group in every BAM file. table.addColumn("sample", "%s"); table.addColumn("library", "%s"); table.addColumn("platform", "%s"); table.addColumn("center", "%s"); table.addColumn("date", "%s"); table.addColumn("has.any.reads"); table.addColumn("is.paired.end"); table.addColumn("n.reads.analyzed", "%d"); table.addColumn("simple.read.type", "%s"); table.addColumn("median.read.length"); table.addColumn("median.insert.size"); for (final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups()) { final String rgID = rg.getId(); table.addRowID(rgID, true); PerReadGroupInfo info = readGroupInfo.get(rgID); // we are paired if > 25% of reads are paired final boolean isPaired = info.nReadsPaired / (1.0 * (info.nReadsSeen + 1)) > 0.25; final boolean hasAnyReads = info.nReadsSeen > 0; final int readLength = info.readLength.getMedian(0); setTableValue(table, rgID, "sample", rg.getSample()); setTableValue(table, rgID, "library", rg.getLibrary()); setTableValue(table, rgID, "platform", rg.getPlatform()); setTableValue(table, rgID, "center", rg.getSequencingCenter()); try { setTableValue( table, rgID, "date", rg.getRunDate() != null ? dateFormatter.format(rg.getRunDate()) : "NA"); } catch (NullPointerException e) { // TODO: remove me when bug in Picard is fixed that causes NPE when date isn't present setTableValue(table, rgID, "date", "NA"); } setTableValue(table, rgID, "has.any.reads", hasAnyReads); setTableValue(table, rgID, "is.paired.end", isPaired); setTableValue(table, rgID, "n.reads.analyzed", info.nReadsSeen); setTableValue( table, rgID, "simple.read.type", hasAnyReads ? String.format("%dx%d", isPaired ? 2 : 1, readLength) : "NA"); setTableValue(table, rgID, "median.read.length", hasAnyReads ? readLength : "NA"); setTableValue( table, rgID, "median.insert.size", hasAnyReads && isPaired ? info.insertSize.getMedian(0) : "NA"); } report.print(out); }
@Override public void initialize() { for (final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups()) { readGroupInfo.put(rg.getId(), new PerReadGroupInfo()); } }