/** Ensure that splitting read groups still works when dealing with null read groups. */ @Test public void testSplitByNullReadGroups() { SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10); GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10); GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10); ReadBackedPileup pileup = new ReadBackedPileupImpl(null, Arrays.asList(read1, read2, read3), Arrays.asList(1, 1, 1)); ReadBackedPileup nullRgPileup = pileup.getPileupForReadGroup(null); List<GATKSAMRecord> nullRgReads = nullRgPileup.getReads(); Assert.assertEquals( nullRgPileup.getNumberOfElements(), 3, "Wrong number of reads in null read group"); Assert.assertEquals( nullRgReads.get(0), read1, "Read " + read1.getReadName() + " should be in null rg but isn't"); Assert.assertEquals( nullRgReads.get(1), read2, "Read " + read2.getReadName() + " should be in null rg but isn't"); Assert.assertEquals( nullRgReads.get(2), read3, "Read " + read3.getReadName() + " should be in null rg but isn't"); ReadBackedPileup rg1Pileup = pileup.getPileupForReadGroup("rg1"); Assert.assertNull(rg1Pileup, "Pileup for non-existent read group should return null"); }
/** Ensure that splitting read groups still works when dealing with a sample-split pileup. */ @Test public void testSplitBySample() { SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1"); readGroupOne.setSample("sample1"); SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2"); readGroupTwo.setSample("sample2"); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); header.addReadGroup(readGroupOne); header.addReadGroup(readGroupTwo); GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10); read1.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10); read2.setAttribute("RG", readGroupTwo.getId()); GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10); read3.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header, "read4", 0, 1, 10); read4.setAttribute("RG", readGroupTwo.getId()); ReadBackedPileupImpl sample1Pileup = new ReadBackedPileupImpl(null, Arrays.asList(read1, read3), Arrays.asList(1, 1)); ReadBackedPileupImpl sample2Pileup = new ReadBackedPileupImpl(null, Arrays.asList(read2, read4), Arrays.asList(1, 1)); Map<String, ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String, ReadBackedPileupImpl>(); sampleToPileupMap.put(readGroupOne.getSample(), sample1Pileup); sampleToPileupMap.put(readGroupTwo.getSample(), sample2Pileup); ReadBackedPileup compositePileup = new ReadBackedPileupImpl(null, sampleToPileupMap); ReadBackedPileup rg1Pileup = compositePileup.getPileupForReadGroup("rg1"); List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads(); Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg1"); Assert.assertEquals( rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't"); Assert.assertEquals( rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't"); ReadBackedPileup rg2Pileup = compositePileup.getPileupForReadGroup("rg2"); List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads(); Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg2"); Assert.assertEquals( rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't"); Assert.assertEquals( rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't"); }
@Test public void realignAtContigBorderTest() { final int contigEnd = header.getSequence(0).getSequenceLength(); final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "goodRead", 0, contigEnd - 1, 2); read.setCigarString("2M"); Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), false); read.setCigarString("1M1D1M"); Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), true); }
@Test public void testGetPileupForSample() { String sample1 = "sample1"; String sample2 = "sample2"; SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1"); readGroupOne.setSample(sample1); SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2"); readGroupTwo.setSample(sample2); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); header.addReadGroup(readGroupOne); header.addReadGroup(readGroupTwo); GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10); read1.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10); read2.setAttribute("RG", readGroupTwo.getId()); Map<String, ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String, ReadBackedPileupImpl>(); sampleToPileupMap.put( sample1, new ReadBackedPileupImpl(null, Collections.singletonList(read1), 0)); sampleToPileupMap.put( sample2, new ReadBackedPileupImpl(null, Collections.singletonList(read2), 0)); ReadBackedPileup pileup = new ReadBackedPileupImpl(null, sampleToPileupMap); ReadBackedPileup sample2Pileup = pileup.getPileupForSample(sample2); Assert.assertEquals( sample2Pileup.getNumberOfElements(), 1, "Sample 2 pileup has wrong number of elements"); Assert.assertEquals( sample2Pileup.getReads().get(0), read2, "Sample 2 pileup has incorrect read"); ReadBackedPileup missingSamplePileup = pileup.getPileupForSample("missing"); Assert.assertNull(missingSamplePileup, "Pileup for sample 'missing' should be null but isn't"); missingSamplePileup = pileup.getPileupForSample("not here"); Assert.assertNull(missingSamplePileup, "Pileup for sample 'not here' should be null but isn't"); }
@Test(dataProvider = "DanglingHeads") public void testDanglingHeads( final String ref, final String alt, final String cigar, final boolean shouldBeMerged) { final int kmerSize = 5; // create the graph and populate it final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize); rtgraph.addSequence("ref", ref.getBytes(), true); final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead( alt.getBytes(), Utils.dupBytes((byte) 30, alt.length()), alt.length() + "M"); rtgraph.addRead(read); rtgraph.setMaxMismatchesInDanglingHead(10); rtgraph.buildGraphIfNecessary(); // confirm that we have just a single dangling head MultiDeBruijnVertex altSource = null; for (final MultiDeBruijnVertex v : rtgraph.vertexSet()) { if (rtgraph.isSource(v) && !rtgraph.isReferenceNode(v)) { Assert.assertTrue(altSource == null, "We found more than one non-reference source"); altSource = v; } } Assert.assertTrue(altSource != null, "We did not find a non-reference source"); // confirm that the SW alignment agrees with our expectations final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstUpwardsReferencePath(altSource, 0, 1); if (result == null) { Assert.assertFalse(shouldBeMerged); return; } Assert.assertTrue( cigar.equals(result.cigar.toString()), "SW generated cigar = " + result.cigar.toString()); // confirm that the tail merging works as expected final int mergeResult = rtgraph.mergeDanglingHead(result); Assert.assertTrue(mergeResult > 0 || !shouldBeMerged); // confirm that we created the appropriate bubble in the graph only if expected rtgraph.cleanNonRefPaths(); final SeqGraph seqGraph = rtgraph.convertToSequenceGraph(); final List<KBestHaplotype> paths = new KBestHaplotypeFinder( seqGraph, seqGraph.getReferenceSourceVertex(), seqGraph.getReferenceSinkVertex()); Assert.assertEquals(paths.size(), shouldBeMerged ? 2 : 1); }
/** Ensure that basic read group splitting works. */ @Test public void testSplitByReadGroup() { SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1"); SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2"); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); header.addReadGroup(readGroupOne); header.addReadGroup(readGroupTwo); GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10); read1.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10); read2.setAttribute("RG", readGroupTwo.getId()); GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10); read3.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header, "read4", 0, 1, 10); read4.setAttribute("RG", readGroupTwo.getId()); GATKSAMRecord read5 = ArtificialSAMUtils.createArtificialRead(header, "read5", 0, 1, 10); read5.setAttribute("RG", readGroupTwo.getId()); GATKSAMRecord read6 = ArtificialSAMUtils.createArtificialRead(header, "read6", 0, 1, 10); read6.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read7 = ArtificialSAMUtils.createArtificialRead(header, "read7", 0, 1, 10); read7.setAttribute("RG", readGroupOne.getId()); ReadBackedPileup pileup = new ReadBackedPileupImpl( null, Arrays.asList(read1, read2, read3, read4, read5, read6, read7), Arrays.asList(1, 1, 1, 1, 1, 1, 1)); ReadBackedPileup rg1Pileup = pileup.getPileupForReadGroup("rg1"); List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads(); Assert.assertEquals(rg1Reads.size(), 4, "Wrong number of reads in read group rg1"); Assert.assertEquals( rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't"); Assert.assertEquals( rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't"); Assert.assertEquals( rg1Reads.get(2), read6, "Read " + read6.getReadName() + " should be in rg1 but isn't"); Assert.assertEquals( rg1Reads.get(3), read7, "Read " + read7.getReadName() + " should be in rg1 but isn't"); ReadBackedPileup rg2Pileup = pileup.getPileupForReadGroup("rg2"); List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads(); Assert.assertEquals(rg2Reads.size(), 3, "Wrong number of reads in read group rg2"); Assert.assertEquals( rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't"); Assert.assertEquals( rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't"); Assert.assertEquals( rg2Reads.get(2), read5, "Read " + read5.getReadName() + " should be in rg2 but isn't"); }
private List<PileupElement> makeReads(final int n, final int mapq, final String op) { final int readLength = 3; final List<PileupElement> elts = new LinkedList<PileupElement>(); for (int i = 0; i < n; i++) { GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, readLength); read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); read.setBaseQualities(Utils.dupBytes((byte) 30, readLength)); read.setCigarString("1M1" + op + "1M"); read.setMappingQuality(mapq); final int baseOffset = op.equals("M") ? 1 : 0; final CigarElement cigarElement = read.getCigar().getCigarElement(1); elts.add(new PileupElement(read, baseOffset, cigarElement, 1, 0)); } return elts; }
@Test public void testRBPMappingQuals() { // create a read with high MQ final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, 10); read.setReadBases(Utils.dupBytes((byte) 'A', 10)); read.setBaseQualities(Utils.dupBytes((byte) 30, 10)); read.setCigarString("10M"); read.setMappingQuality(200); // set a MQ higher than max signed byte // now create the RBP final List<PileupElement> elts = new LinkedList<>(); elts.add(new PileupElement(read, 0, read.getCigar().getCigarElement(0), 0, 0)); final Map<String, ReadBackedPileupImpl> pileupsBySample = new HashMap<>(); pileupsBySample.put("foo", new ReadBackedPileupImpl(loc, elts)); final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, pileupsBySample); Assert.assertEquals(pileup.getMappingQuals()[0], 200); }
@Test(dataProvider = "DanglingTails") public void testDanglingTails( final String refEnd, final String altEnd, final String cigar, final boolean cigarIsGood, final int mergePointDistanceFromSink) { final int kmerSize = 15; // construct the haplotypes final String commonPrefix = "AAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTTT"; final String ref = commonPrefix + refEnd; final String alt = commonPrefix + altEnd; // create the graph and populate it final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize); rtgraph.addSequence("ref", ref.getBytes(), true); final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead( alt.getBytes(), Utils.dupBytes((byte) 30, alt.length()), alt.length() + "M"); rtgraph.addRead(read); rtgraph.buildGraphIfNecessary(); // confirm that we have just a single dangling tail MultiDeBruijnVertex altSink = null; for (final MultiDeBruijnVertex v : rtgraph.vertexSet()) { if (rtgraph.isSink(v) && !rtgraph.isReferenceNode(v)) { Assert.assertTrue(altSink == null, "We found more than one non-reference sink"); altSink = v; } } Assert.assertTrue(altSink != null, "We did not find a non-reference sink"); // confirm that the SW alignment agrees with our expectations final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstDownwardsReferencePath(altSink, 0, 4); if (result == null) { Assert.assertFalse(cigarIsGood); return; } Assert.assertTrue( cigar.equals(result.cigar.toString()), "SW generated cigar = " + result.cigar.toString()); // confirm that the goodness of the cigar agrees with our expectations Assert.assertEquals(rtgraph.cigarIsOkayToMerge(result.cigar, false, true), cigarIsGood); // confirm that the tail merging works as expected if (cigarIsGood) { final int mergeResult = rtgraph.mergeDanglingTail(result); Assert.assertTrue(mergeResult == 1 || mergePointDistanceFromSink == -1); // confirm that we created the appropriate edge if (mergePointDistanceFromSink >= 0) { MultiDeBruijnVertex v = altSink; for (int i = 0; i < mergePointDistanceFromSink; i++) { if (rtgraph.inDegreeOf(v) != 1) Assert.fail("Encountered vertex with multiple edges"); v = rtgraph.getEdgeSource(rtgraph.incomingEdgeOf(v)); } Assert.assertTrue(rtgraph.outDegreeOf(v) > 1); } } }