/**
   * Generates tab delimited string containing details about the passed SAMReadGroupRecord
   *
   * @param readGroupRecord record
   * @return tab delimited string containing details about the SAMReadGroupRecord
   */
  private String getReadGroupDetails(final SAMReadGroupRecord readGroupRecord) {
    final List<String> elements = new ArrayList<>(5);

    final String tmp[] =
        readGroupRecord
            .getPlatformUnit()
            .split("\\."); // Expect to look like: D047KACXX110901.1.ACCAACTG
    String runBarcode = "?";
    String lane = "?";
    String molBarcode = "?";
    if ((tmp.length == 3) || (tmp.length == 2)) {
      runBarcode = tmp[0];
      lane = tmp[1];
      molBarcode =
          (tmp.length == 3)
              ? tmp[2]
              : ""; // In older BAMS there may be no molecular barcode sequence
    } else {
      log.error("Unexpected format " + readGroupRecord.getPlatformUnit() + " for PU attribute");
    }
    elements.add(runBarcode);
    elements.add(lane);
    elements.add(molBarcode);
    elements.add(readGroupRecord.getLibrary());
    elements.add(readGroupRecord.getSample());
    return String.join("\t", elements);
  }
  @Override
  protected void setup(final SAMFileHeader header, final File samFile) {
    final String outext =
        (null != FILE_EXTENSION) ? FILE_EXTENSION : ""; // Add a file extension if desired
    preAdapterSummaryOut =
        new File(OUTPUT + SequencingArtifactMetrics.PRE_ADAPTER_SUMMARY_EXT + outext);
    preAdapterDetailsOut =
        new File(OUTPUT + SequencingArtifactMetrics.PRE_ADAPTER_DETAILS_EXT + outext);
    baitBiasSummaryOut =
        new File(OUTPUT + SequencingArtifactMetrics.BAIT_BIAS_SUMMARY_EXT + outext);
    baitBiasDetailsOut =
        new File(OUTPUT + SequencingArtifactMetrics.BAIT_BIAS_DETAILS_EXT + outext);

    IOUtil.assertFileIsWritable(preAdapterSummaryOut);
    IOUtil.assertFileIsWritable(preAdapterDetailsOut);
    IOUtil.assertFileIsWritable(baitBiasSummaryOut);
    IOUtil.assertFileIsWritable(baitBiasDetailsOut);

    for (final SAMReadGroupRecord rec : header.getReadGroups()) {
      samples.add(getOrElse(rec.getSample(), UNKNOWN_SAMPLE));
      libraries.add(getOrElse(rec.getLibrary(), UNKNOWN_LIBRARY));
    }

    if (INTERVALS != null) {
      IOUtil.assertFileIsReadable(INTERVALS);
      intervalMask =
          new IntervalListReferenceSequenceMask(IntervalList.fromFile(INTERVALS).uniqued());
    }

    if (DB_SNP != null) {
      IOUtil.assertFileIsReadable(DB_SNP);
      dbSnpMask = new DbSnpBitSetUtil(DB_SNP, header.getSequenceDictionary());
    }

    // set record-level filters
    final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>();
    filters.add(new FailsVendorReadQualityFilter());
    filters.add(new NotPrimaryAlignmentFilter());
    filters.add(new DuplicateReadFilter());
    filters.add(new AlignedFilter(true)); // discard unmapped reads
    filters.add(new MappingQualityFilter(MINIMUM_MAPPING_QUALITY));
    if (!INCLUDE_UNPAIRED) {
      final int effectiveMaxInsertSize =
          (MAXIMUM_INSERT_SIZE == 0) ? Integer.MAX_VALUE : MAXIMUM_INSERT_SIZE;
      filters.add(new InsertSizeFilter(MINIMUM_INSERT_SIZE, effectiveMaxInsertSize));
    }
    recordFilter = new AggregateFilter(filters);

    // set up the artifact counters
    final String sampleAlias = StringUtil.join(",", new ArrayList<String>(samples));
    for (final String library : libraries) {
      artifactCounters.put(
          library, new ArtifactCounter(sampleAlias, library, CONTEXT_SIZE, TANDEM_READS));
    }
  }
  /**
   * Method that combines the fingerprint evidence across all the read groups for the same library
   * and then produces a matrix of LOD scores for comparing every library with every other library.
   */
  private void crossCheckLibraries(
      final Map<SAMReadGroupRecord, Fingerprint> fingerprints, final PrintStream out) {
    final List<Fingerprint> fixedFps = new ArrayList<>();
    for (final SAMReadGroupRecord rg : fingerprints.keySet()) {
      final Fingerprint old = fingerprints.get(rg);
      final String name = rg.getSample() + "::" + rg.getLibrary();
      final Fingerprint newFp = new Fingerprint(name, old.getSource(), old.getInfo());
      newFp.putAll(old);

      fixedFps.add(newFp);
    }

    crossCheckSamples(fixedFps, out);
  }
示例#4
0
 /**
  * Returns the library associated with the provided read's read group.
  *
  * @param read read whose library to retrieve
  * @param header SAM header containing read groups
  * @return the library for the provided read's read group as a String, or null if the read has no
  *     read group.
  */
 public static String getLibrary(final GATKRead read, final SAMFileHeader header) {
   final SAMReadGroupRecord readGroup = getSAMReadGroupRecord(read, header);
   return readGroup != null ? readGroup.getLibrary() : null;
 }
  @Override
  public void onTraversalDone(Integer sum) {
    final GATKReport report = new GATKReport();
    report.addTable(TABLE_NAME, "Table of read group properties", 12);
    GATKReportTable table = report.getTable(TABLE_NAME);
    DateFormat dateFormatter = DateFormat.getDateInstance(DateFormat.SHORT);

    table.addColumn("readgroup");
    // * Emits a GATKReport containing read group, sample, library, platform, center, median insert
    // size and
    // * median read length for each read group in every BAM file.
    table.addColumn("sample", "%s");
    table.addColumn("library", "%s");
    table.addColumn("platform", "%s");
    table.addColumn("center", "%s");
    table.addColumn("date", "%s");
    table.addColumn("has.any.reads");
    table.addColumn("is.paired.end");
    table.addColumn("n.reads.analyzed", "%d");
    table.addColumn("simple.read.type", "%s");
    table.addColumn("median.read.length");
    table.addColumn("median.insert.size");

    for (final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups()) {
      final String rgID = rg.getId();
      table.addRowID(rgID, true);
      PerReadGroupInfo info = readGroupInfo.get(rgID);

      // we are paired if > 25% of reads are paired
      final boolean isPaired = info.nReadsPaired / (1.0 * (info.nReadsSeen + 1)) > 0.25;
      final boolean hasAnyReads = info.nReadsSeen > 0;
      final int readLength = info.readLength.getMedian(0);

      setTableValue(table, rgID, "sample", rg.getSample());
      setTableValue(table, rgID, "library", rg.getLibrary());
      setTableValue(table, rgID, "platform", rg.getPlatform());
      setTableValue(table, rgID, "center", rg.getSequencingCenter());
      try {
        setTableValue(
            table,
            rgID,
            "date",
            rg.getRunDate() != null ? dateFormatter.format(rg.getRunDate()) : "NA");
      } catch (NullPointerException e) {
        // TODO: remove me when bug in Picard is fixed that causes NPE when date isn't present
        setTableValue(table, rgID, "date", "NA");
      }
      setTableValue(table, rgID, "has.any.reads", hasAnyReads);
      setTableValue(table, rgID, "is.paired.end", isPaired);
      setTableValue(table, rgID, "n.reads.analyzed", info.nReadsSeen);
      setTableValue(
          table,
          rgID,
          "simple.read.type",
          hasAnyReads ? String.format("%dx%d", isPaired ? 2 : 1, readLength) : "NA");
      setTableValue(table, rgID, "median.read.length", hasAnyReads ? readLength : "NA");
      setTableValue(
          table,
          rgID,
          "median.insert.size",
          hasAnyReads && isPaired ? info.insertSize.getMedian(0) : "NA");
    }

    report.print(out);
  }