/** Ensure that splitting read groups still works when dealing with a sample-split pileup. */
  @Test
  public void testSplitBySample() {
    SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
    readGroupOne.setSample("sample1");
    SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2");
    readGroupTwo.setSample("sample2");

    SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
    header.addReadGroup(readGroupOne);
    header.addReadGroup(readGroupTwo);

    GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10);
    read1.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10);
    read2.setAttribute("RG", readGroupTwo.getId());
    GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10);
    read3.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header, "read4", 0, 1, 10);
    read4.setAttribute("RG", readGroupTwo.getId());

    ReadBackedPileupImpl sample1Pileup =
        new ReadBackedPileupImpl(null, Arrays.asList(read1, read3), Arrays.asList(1, 1));
    ReadBackedPileupImpl sample2Pileup =
        new ReadBackedPileupImpl(null, Arrays.asList(read2, read4), Arrays.asList(1, 1));
    Map<String, ReadBackedPileupImpl> sampleToPileupMap =
        new HashMap<String, ReadBackedPileupImpl>();
    sampleToPileupMap.put(readGroupOne.getSample(), sample1Pileup);
    sampleToPileupMap.put(readGroupTwo.getSample(), sample2Pileup);

    ReadBackedPileup compositePileup = new ReadBackedPileupImpl(null, sampleToPileupMap);

    ReadBackedPileup rg1Pileup = compositePileup.getPileupForReadGroup("rg1");
    List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads();

    Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg1");
    Assert.assertEquals(
        rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't");
    Assert.assertEquals(
        rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't");

    ReadBackedPileup rg2Pileup = compositePileup.getPileupForReadGroup("rg2");
    List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads();

    Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg2");
    Assert.assertEquals(
        rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't");
    Assert.assertEquals(
        rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't");
  }
  @Test
  public void testGetPileupForSample() {
    String sample1 = "sample1";
    String sample2 = "sample2";

    SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
    readGroupOne.setSample(sample1);
    SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2");
    readGroupTwo.setSample(sample2);

    SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
    header.addReadGroup(readGroupOne);
    header.addReadGroup(readGroupTwo);

    GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10);
    read1.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10);
    read2.setAttribute("RG", readGroupTwo.getId());

    Map<String, ReadBackedPileupImpl> sampleToPileupMap =
        new HashMap<String, ReadBackedPileupImpl>();
    sampleToPileupMap.put(
        sample1, new ReadBackedPileupImpl(null, Collections.singletonList(read1), 0));
    sampleToPileupMap.put(
        sample2, new ReadBackedPileupImpl(null, Collections.singletonList(read2), 0));

    ReadBackedPileup pileup = new ReadBackedPileupImpl(null, sampleToPileupMap);

    ReadBackedPileup sample2Pileup = pileup.getPileupForSample(sample2);
    Assert.assertEquals(
        sample2Pileup.getNumberOfElements(), 1, "Sample 2 pileup has wrong number of elements");
    Assert.assertEquals(
        sample2Pileup.getReads().get(0), read2, "Sample 2 pileup has incorrect read");

    ReadBackedPileup missingSamplePileup = pileup.getPileupForSample("missing");
    Assert.assertNull(missingSamplePileup, "Pileup for sample 'missing' should be null but isn't");

    missingSamplePileup = pileup.getPileupForSample("not here");
    Assert.assertNull(missingSamplePileup, "Pileup for sample 'not here' should be null but isn't");
  }
  /** Ensure that basic read group splitting works. */
  @Test
  public void testSplitByReadGroup() {
    SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
    SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2");

    SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
    header.addReadGroup(readGroupOne);
    header.addReadGroup(readGroupTwo);

    GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10);
    read1.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10);
    read2.setAttribute("RG", readGroupTwo.getId());
    GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10);
    read3.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header, "read4", 0, 1, 10);
    read4.setAttribute("RG", readGroupTwo.getId());
    GATKSAMRecord read5 = ArtificialSAMUtils.createArtificialRead(header, "read5", 0, 1, 10);
    read5.setAttribute("RG", readGroupTwo.getId());
    GATKSAMRecord read6 = ArtificialSAMUtils.createArtificialRead(header, "read6", 0, 1, 10);
    read6.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read7 = ArtificialSAMUtils.createArtificialRead(header, "read7", 0, 1, 10);
    read7.setAttribute("RG", readGroupOne.getId());

    ReadBackedPileup pileup =
        new ReadBackedPileupImpl(
            null,
            Arrays.asList(read1, read2, read3, read4, read5, read6, read7),
            Arrays.asList(1, 1, 1, 1, 1, 1, 1));

    ReadBackedPileup rg1Pileup = pileup.getPileupForReadGroup("rg1");
    List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads();
    Assert.assertEquals(rg1Reads.size(), 4, "Wrong number of reads in read group rg1");
    Assert.assertEquals(
        rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't");
    Assert.assertEquals(
        rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't");
    Assert.assertEquals(
        rg1Reads.get(2), read6, "Read " + read6.getReadName() + " should be in rg1 but isn't");
    Assert.assertEquals(
        rg1Reads.get(3), read7, "Read " + read7.getReadName() + " should be in rg1 but isn't");

    ReadBackedPileup rg2Pileup = pileup.getPileupForReadGroup("rg2");
    List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads();
    Assert.assertEquals(rg2Reads.size(), 3, "Wrong number of reads in read group rg2");
    Assert.assertEquals(
        rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't");
    Assert.assertEquals(
        rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't");
    Assert.assertEquals(
        rg2Reads.get(2), read5, "Read " + read5.getReadName() + " should be in rg2 but isn't");
  }
Example #4
0
 /** Get the ID of the readgroup. */
 public static String getID(final SAMReadGroupRecord rg) {
   final String pu = rg.getPlatformUnit();
   return pu == null ? rg.getId() : pu;
 }
  @Override
  public void onTraversalDone(Integer sum) {
    final GATKReport report = new GATKReport();
    report.addTable(TABLE_NAME, "Table of read group properties", 12);
    GATKReportTable table = report.getTable(TABLE_NAME);
    DateFormat dateFormatter = DateFormat.getDateInstance(DateFormat.SHORT);

    table.addColumn("readgroup");
    // * Emits a GATKReport containing read group, sample, library, platform, center, median insert
    // size and
    // * median read length for each read group in every BAM file.
    table.addColumn("sample", "%s");
    table.addColumn("library", "%s");
    table.addColumn("platform", "%s");
    table.addColumn("center", "%s");
    table.addColumn("date", "%s");
    table.addColumn("has.any.reads");
    table.addColumn("is.paired.end");
    table.addColumn("n.reads.analyzed", "%d");
    table.addColumn("simple.read.type", "%s");
    table.addColumn("median.read.length");
    table.addColumn("median.insert.size");

    for (final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups()) {
      final String rgID = rg.getId();
      table.addRowID(rgID, true);
      PerReadGroupInfo info = readGroupInfo.get(rgID);

      // we are paired if > 25% of reads are paired
      final boolean isPaired = info.nReadsPaired / (1.0 * (info.nReadsSeen + 1)) > 0.25;
      final boolean hasAnyReads = info.nReadsSeen > 0;
      final int readLength = info.readLength.getMedian(0);

      setTableValue(table, rgID, "sample", rg.getSample());
      setTableValue(table, rgID, "library", rg.getLibrary());
      setTableValue(table, rgID, "platform", rg.getPlatform());
      setTableValue(table, rgID, "center", rg.getSequencingCenter());
      try {
        setTableValue(
            table,
            rgID,
            "date",
            rg.getRunDate() != null ? dateFormatter.format(rg.getRunDate()) : "NA");
      } catch (NullPointerException e) {
        // TODO: remove me when bug in Picard is fixed that causes NPE when date isn't present
        setTableValue(table, rgID, "date", "NA");
      }
      setTableValue(table, rgID, "has.any.reads", hasAnyReads);
      setTableValue(table, rgID, "is.paired.end", isPaired);
      setTableValue(table, rgID, "n.reads.analyzed", info.nReadsSeen);
      setTableValue(
          table,
          rgID,
          "simple.read.type",
          hasAnyReads ? String.format("%dx%d", isPaired ? 2 : 1, readLength) : "NA");
      setTableValue(table, rgID, "median.read.length", hasAnyReads ? readLength : "NA");
      setTableValue(
          table,
          rgID,
          "median.insert.size",
          hasAnyReads && isPaired ? info.insertSize.getMedian(0) : "NA");
    }

    report.print(out);
  }
 @Override
 public void initialize() {
   for (final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups()) {
     readGroupInfo.put(rg.getId(), new PerReadGroupInfo());
   }
 }