/** Ensure that splitting read groups still works when dealing with null read groups. */
  @Test
  public void testSplitByNullReadGroups() {
    SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);

    GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10);
    GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10);
    GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10);

    ReadBackedPileup pileup =
        new ReadBackedPileupImpl(null, Arrays.asList(read1, read2, read3), Arrays.asList(1, 1, 1));

    ReadBackedPileup nullRgPileup = pileup.getPileupForReadGroup(null);
    List<GATKSAMRecord> nullRgReads = nullRgPileup.getReads();
    Assert.assertEquals(
        nullRgPileup.getNumberOfElements(), 3, "Wrong number of reads in null read group");
    Assert.assertEquals(
        nullRgReads.get(0),
        read1,
        "Read " + read1.getReadName() + " should be in null rg but isn't");
    Assert.assertEquals(
        nullRgReads.get(1),
        read2,
        "Read " + read2.getReadName() + " should be in null rg but isn't");
    Assert.assertEquals(
        nullRgReads.get(2),
        read3,
        "Read " + read3.getReadName() + " should be in null rg but isn't");

    ReadBackedPileup rg1Pileup = pileup.getPileupForReadGroup("rg1");
    Assert.assertNull(rg1Pileup, "Pileup for non-existent read group should return null");
  }
  /** Ensure that splitting read groups still works when dealing with a sample-split pileup. */
  @Test
  public void testSplitBySample() {
    SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
    readGroupOne.setSample("sample1");
    SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2");
    readGroupTwo.setSample("sample2");

    SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
    header.addReadGroup(readGroupOne);
    header.addReadGroup(readGroupTwo);

    GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10);
    read1.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10);
    read2.setAttribute("RG", readGroupTwo.getId());
    GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10);
    read3.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header, "read4", 0, 1, 10);
    read4.setAttribute("RG", readGroupTwo.getId());

    ReadBackedPileupImpl sample1Pileup =
        new ReadBackedPileupImpl(null, Arrays.asList(read1, read3), Arrays.asList(1, 1));
    ReadBackedPileupImpl sample2Pileup =
        new ReadBackedPileupImpl(null, Arrays.asList(read2, read4), Arrays.asList(1, 1));
    Map<String, ReadBackedPileupImpl> sampleToPileupMap =
        new HashMap<String, ReadBackedPileupImpl>();
    sampleToPileupMap.put(readGroupOne.getSample(), sample1Pileup);
    sampleToPileupMap.put(readGroupTwo.getSample(), sample2Pileup);

    ReadBackedPileup compositePileup = new ReadBackedPileupImpl(null, sampleToPileupMap);

    ReadBackedPileup rg1Pileup = compositePileup.getPileupForReadGroup("rg1");
    List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads();

    Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg1");
    Assert.assertEquals(
        rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't");
    Assert.assertEquals(
        rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't");

    ReadBackedPileup rg2Pileup = compositePileup.getPileupForReadGroup("rg2");
    List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads();

    Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg2");
    Assert.assertEquals(
        rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't");
    Assert.assertEquals(
        rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't");
  }
Ejemplo n.º 3
0
 @Test
 public void realignAtContigBorderTest() {
   final int contigEnd = header.getSequence(0).getSequenceLength();
   final GATKSAMRecord read =
       ArtificialSAMUtils.createArtificialRead(header, "goodRead", 0, contigEnd - 1, 2);
   read.setCigarString("2M");
   Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), false);
   read.setCigarString("1M1D1M");
   Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), true);
 }
  /** Ensure that basic read group splitting works. */
  @Test
  public void testSplitByReadGroup() {
    SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
    SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2");

    SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
    header.addReadGroup(readGroupOne);
    header.addReadGroup(readGroupTwo);

    GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10);
    read1.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10);
    read2.setAttribute("RG", readGroupTwo.getId());
    GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10);
    read3.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header, "read4", 0, 1, 10);
    read4.setAttribute("RG", readGroupTwo.getId());
    GATKSAMRecord read5 = ArtificialSAMUtils.createArtificialRead(header, "read5", 0, 1, 10);
    read5.setAttribute("RG", readGroupTwo.getId());
    GATKSAMRecord read6 = ArtificialSAMUtils.createArtificialRead(header, "read6", 0, 1, 10);
    read6.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read7 = ArtificialSAMUtils.createArtificialRead(header, "read7", 0, 1, 10);
    read7.setAttribute("RG", readGroupOne.getId());

    ReadBackedPileup pileup =
        new ReadBackedPileupImpl(
            null,
            Arrays.asList(read1, read2, read3, read4, read5, read6, read7),
            Arrays.asList(1, 1, 1, 1, 1, 1, 1));

    ReadBackedPileup rg1Pileup = pileup.getPileupForReadGroup("rg1");
    List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads();
    Assert.assertEquals(rg1Reads.size(), 4, "Wrong number of reads in read group rg1");
    Assert.assertEquals(
        rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't");
    Assert.assertEquals(
        rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't");
    Assert.assertEquals(
        rg1Reads.get(2), read6, "Read " + read6.getReadName() + " should be in rg1 but isn't");
    Assert.assertEquals(
        rg1Reads.get(3), read7, "Read " + read7.getReadName() + " should be in rg1 but isn't");

    ReadBackedPileup rg2Pileup = pileup.getPileupForReadGroup("rg2");
    List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads();
    Assert.assertEquals(rg2Reads.size(), 3, "Wrong number of reads in read group rg2");
    Assert.assertEquals(
        rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't");
    Assert.assertEquals(
        rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't");
    Assert.assertEquals(
        rg2Reads.get(2), read5, "Read " + read5.getReadName() + " should be in rg2 but isn't");
  }
  @Test
  public void testGetPileupForSample() {
    String sample1 = "sample1";
    String sample2 = "sample2";

    SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
    readGroupOne.setSample(sample1);
    SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2");
    readGroupTwo.setSample(sample2);

    SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
    header.addReadGroup(readGroupOne);
    header.addReadGroup(readGroupTwo);

    GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10);
    read1.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10);
    read2.setAttribute("RG", readGroupTwo.getId());

    Map<String, ReadBackedPileupImpl> sampleToPileupMap =
        new HashMap<String, ReadBackedPileupImpl>();
    sampleToPileupMap.put(
        sample1, new ReadBackedPileupImpl(null, Collections.singletonList(read1), 0));
    sampleToPileupMap.put(
        sample2, new ReadBackedPileupImpl(null, Collections.singletonList(read2), 0));

    ReadBackedPileup pileup = new ReadBackedPileupImpl(null, sampleToPileupMap);

    ReadBackedPileup sample2Pileup = pileup.getPileupForSample(sample2);
    Assert.assertEquals(
        sample2Pileup.getNumberOfElements(), 1, "Sample 2 pileup has wrong number of elements");
    Assert.assertEquals(
        sample2Pileup.getReads().get(0), read2, "Sample 2 pileup has incorrect read");

    ReadBackedPileup missingSamplePileup = pileup.getPileupForSample("missing");
    Assert.assertNull(missingSamplePileup, "Pileup for sample 'missing' should be null but isn't");

    missingSamplePileup = pileup.getPileupForSample("not here");
    Assert.assertNull(missingSamplePileup, "Pileup for sample 'not here' should be null but isn't");
  }
  @Test(dataProvider = "DanglingHeads")
  public void testDanglingHeads(
      final String ref, final String alt, final String cigar, final boolean shouldBeMerged) {

    final int kmerSize = 5;

    // create the graph and populate it
    final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize);
    rtgraph.addSequence("ref", ref.getBytes(), true);
    final GATKSAMRecord read =
        ArtificialSAMUtils.createArtificialRead(
            alt.getBytes(), Utils.dupBytes((byte) 30, alt.length()), alt.length() + "M");
    rtgraph.addRead(read);
    rtgraph.setMaxMismatchesInDanglingHead(10);
    rtgraph.buildGraphIfNecessary();

    // confirm that we have just a single dangling head
    MultiDeBruijnVertex altSource = null;
    for (final MultiDeBruijnVertex v : rtgraph.vertexSet()) {
      if (rtgraph.isSource(v) && !rtgraph.isReferenceNode(v)) {
        Assert.assertTrue(altSource == null, "We found more than one non-reference source");
        altSource = v;
      }
    }

    Assert.assertTrue(altSource != null, "We did not find a non-reference source");

    // confirm that the SW alignment agrees with our expectations
    final ReadThreadingGraph.DanglingChainMergeHelper result =
        rtgraph.generateCigarAgainstUpwardsReferencePath(altSource, 0, 1);

    if (result == null) {
      Assert.assertFalse(shouldBeMerged);
      return;
    }

    Assert.assertTrue(
        cigar.equals(result.cigar.toString()), "SW generated cigar = " + result.cigar.toString());

    // confirm that the tail merging works as expected
    final int mergeResult = rtgraph.mergeDanglingHead(result);
    Assert.assertTrue(mergeResult > 0 || !shouldBeMerged);

    // confirm that we created the appropriate bubble in the graph only if expected
    rtgraph.cleanNonRefPaths();
    final SeqGraph seqGraph = rtgraph.convertToSequenceGraph();
    final List<KBestHaplotype> paths =
        new KBestHaplotypeFinder(
            seqGraph, seqGraph.getReferenceSourceVertex(), seqGraph.getReferenceSinkVertex());
    Assert.assertEquals(paths.size(), shouldBeMerged ? 2 : 1);
  }
    private List<PileupElement> makeReads(final int n, final int mapq, final String op) {
      final int readLength = 3;

      final List<PileupElement> elts = new LinkedList<PileupElement>();
      for (int i = 0; i < n; i++) {
        GATKSAMRecord read =
            ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, readLength);
        read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
        read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
        read.setCigarString("1M1" + op + "1M");
        read.setMappingQuality(mapq);
        final int baseOffset = op.equals("M") ? 1 : 0;
        final CigarElement cigarElement = read.getCigar().getCigarElement(1);
        elts.add(new PileupElement(read, baseOffset, cigarElement, 1, 0));
      }

      return elts;
    }
  @Test
  public void testRBPMappingQuals() {

    // create a read with high MQ
    final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, 10);
    read.setReadBases(Utils.dupBytes((byte) 'A', 10));
    read.setBaseQualities(Utils.dupBytes((byte) 30, 10));
    read.setCigarString("10M");
    read.setMappingQuality(200); // set a MQ higher than max signed byte

    // now create the RBP
    final List<PileupElement> elts = new LinkedList<>();
    elts.add(new PileupElement(read, 0, read.getCigar().getCigarElement(0), 0, 0));
    final Map<String, ReadBackedPileupImpl> pileupsBySample = new HashMap<>();
    pileupsBySample.put("foo", new ReadBackedPileupImpl(loc, elts));
    final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, pileupsBySample);

    Assert.assertEquals(pileup.getMappingQuals()[0], 200);
  }
  @Test(dataProvider = "DanglingTails")
  public void testDanglingTails(
      final String refEnd,
      final String altEnd,
      final String cigar,
      final boolean cigarIsGood,
      final int mergePointDistanceFromSink) {

    final int kmerSize = 15;

    // construct the haplotypes
    final String commonPrefix = "AAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTTT";
    final String ref = commonPrefix + refEnd;
    final String alt = commonPrefix + altEnd;

    // create the graph and populate it
    final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize);
    rtgraph.addSequence("ref", ref.getBytes(), true);
    final GATKSAMRecord read =
        ArtificialSAMUtils.createArtificialRead(
            alt.getBytes(), Utils.dupBytes((byte) 30, alt.length()), alt.length() + "M");
    rtgraph.addRead(read);
    rtgraph.buildGraphIfNecessary();

    // confirm that we have just a single dangling tail
    MultiDeBruijnVertex altSink = null;
    for (final MultiDeBruijnVertex v : rtgraph.vertexSet()) {
      if (rtgraph.isSink(v) && !rtgraph.isReferenceNode(v)) {
        Assert.assertTrue(altSink == null, "We found more than one non-reference sink");
        altSink = v;
      }
    }

    Assert.assertTrue(altSink != null, "We did not find a non-reference sink");

    // confirm that the SW alignment agrees with our expectations
    final ReadThreadingGraph.DanglingChainMergeHelper result =
        rtgraph.generateCigarAgainstDownwardsReferencePath(altSink, 0, 4);

    if (result == null) {
      Assert.assertFalse(cigarIsGood);
      return;
    }

    Assert.assertTrue(
        cigar.equals(result.cigar.toString()), "SW generated cigar = " + result.cigar.toString());

    // confirm that the goodness of the cigar agrees with our expectations
    Assert.assertEquals(rtgraph.cigarIsOkayToMerge(result.cigar, false, true), cigarIsGood);

    // confirm that the tail merging works as expected
    if (cigarIsGood) {
      final int mergeResult = rtgraph.mergeDanglingTail(result);
      Assert.assertTrue(mergeResult == 1 || mergePointDistanceFromSink == -1);

      // confirm that we created the appropriate edge
      if (mergePointDistanceFromSink >= 0) {
        MultiDeBruijnVertex v = altSink;
        for (int i = 0; i < mergePointDistanceFromSink; i++) {
          if (rtgraph.inDegreeOf(v) != 1) Assert.fail("Encountered vertex with multiple edges");
          v = rtgraph.getEdgeSource(rtgraph.incomingEdgeOf(v));
        }
        Assert.assertTrue(rtgraph.outDegreeOf(v) > 1);
      }
    }
  }
Ejemplo n.º 10
0
 @BeforeClass
 public void beforeClass() {
   header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
   genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
   loc = genomeLocParser.createGenomeLoc("chr1", 1);
 }
Ejemplo n.º 11
0
 @BeforeClass
 public void setup() throws FileNotFoundException {
   final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
   header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
 }