/** * Generates tab delimited string containing details about the passed SAMReadGroupRecord * * @param readGroupRecord record * @return tab delimited string containing details about the SAMReadGroupRecord */ private String getReadGroupDetails(final SAMReadGroupRecord readGroupRecord) { final List<String> elements = new ArrayList<>(5); final String tmp[] = readGroupRecord .getPlatformUnit() .split("\\."); // Expect to look like: D047KACXX110901.1.ACCAACTG String runBarcode = "?"; String lane = "?"; String molBarcode = "?"; if ((tmp.length == 3) || (tmp.length == 2)) { runBarcode = tmp[0]; lane = tmp[1]; molBarcode = (tmp.length == 3) ? tmp[2] : ""; // In older BAMS there may be no molecular barcode sequence } else { log.error("Unexpected format " + readGroupRecord.getPlatformUnit() + " for PU attribute"); } elements.add(runBarcode); elements.add(lane); elements.add(molBarcode); elements.add(readGroupRecord.getLibrary()); elements.add(readGroupRecord.getSample()); return String.join("\t", elements); }
@Override protected void setup(final SAMFileHeader header, final File samFile) { final String outext = (null != FILE_EXTENSION) ? FILE_EXTENSION : ""; // Add a file extension if desired preAdapterSummaryOut = new File(OUTPUT + SequencingArtifactMetrics.PRE_ADAPTER_SUMMARY_EXT + outext); preAdapterDetailsOut = new File(OUTPUT + SequencingArtifactMetrics.PRE_ADAPTER_DETAILS_EXT + outext); baitBiasSummaryOut = new File(OUTPUT + SequencingArtifactMetrics.BAIT_BIAS_SUMMARY_EXT + outext); baitBiasDetailsOut = new File(OUTPUT + SequencingArtifactMetrics.BAIT_BIAS_DETAILS_EXT + outext); IOUtil.assertFileIsWritable(preAdapterSummaryOut); IOUtil.assertFileIsWritable(preAdapterDetailsOut); IOUtil.assertFileIsWritable(baitBiasSummaryOut); IOUtil.assertFileIsWritable(baitBiasDetailsOut); for (final SAMReadGroupRecord rec : header.getReadGroups()) { samples.add(getOrElse(rec.getSample(), UNKNOWN_SAMPLE)); libraries.add(getOrElse(rec.getLibrary(), UNKNOWN_LIBRARY)); } if (INTERVALS != null) { IOUtil.assertFileIsReadable(INTERVALS); intervalMask = new IntervalListReferenceSequenceMask(IntervalList.fromFile(INTERVALS).uniqued()); } if (DB_SNP != null) { IOUtil.assertFileIsReadable(DB_SNP); dbSnpMask = new DbSnpBitSetUtil(DB_SNP, header.getSequenceDictionary()); } // set record-level filters final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>(); filters.add(new FailsVendorReadQualityFilter()); filters.add(new NotPrimaryAlignmentFilter()); filters.add(new DuplicateReadFilter()); filters.add(new AlignedFilter(true)); // discard unmapped reads filters.add(new MappingQualityFilter(MINIMUM_MAPPING_QUALITY)); if (!INCLUDE_UNPAIRED) { final int effectiveMaxInsertSize = (MAXIMUM_INSERT_SIZE == 0) ? Integer.MAX_VALUE : MAXIMUM_INSERT_SIZE; filters.add(new InsertSizeFilter(MINIMUM_INSERT_SIZE, effectiveMaxInsertSize)); } recordFilter = new AggregateFilter(filters); // set up the artifact counters final String sampleAlias = StringUtil.join(",", new ArrayList<String>(samples)); for (final String library : libraries) { artifactCounters.put( library, new ArtifactCounter(sampleAlias, library, CONTEXT_SIZE, TANDEM_READS)); } }
/** * Method that combines the fingerprint evidence across all the read groups for the same library * and then produces a matrix of LOD scores for comparing every library with every other library. */ private void crossCheckLibraries( final Map<SAMReadGroupRecord, Fingerprint> fingerprints, final PrintStream out) { final List<Fingerprint> fixedFps = new ArrayList<>(); for (final SAMReadGroupRecord rg : fingerprints.keySet()) { final Fingerprint old = fingerprints.get(rg); final String name = rg.getSample() + "::" + rg.getLibrary(); final Fingerprint newFp = new Fingerprint(name, old.getSource(), old.getInfo()); newFp.putAll(old); fixedFps.add(newFp); } crossCheckSamples(fixedFps, out); }
/** * Returns the library associated with the provided read's read group. * * @param read read whose library to retrieve * @param header SAM header containing read groups * @return the library for the provided read's read group as a String, or null if the read has no * read group. */ public static String getLibrary(final GATKRead read, final SAMFileHeader header) { final SAMReadGroupRecord readGroup = getSAMReadGroupRecord(read, header); return readGroup != null ? readGroup.getLibrary() : null; }
@Override public void onTraversalDone(Integer sum) { final GATKReport report = new GATKReport(); report.addTable(TABLE_NAME, "Table of read group properties", 12); GATKReportTable table = report.getTable(TABLE_NAME); DateFormat dateFormatter = DateFormat.getDateInstance(DateFormat.SHORT); table.addColumn("readgroup"); // * Emits a GATKReport containing read group, sample, library, platform, center, median insert // size and // * median read length for each read group in every BAM file. table.addColumn("sample", "%s"); table.addColumn("library", "%s"); table.addColumn("platform", "%s"); table.addColumn("center", "%s"); table.addColumn("date", "%s"); table.addColumn("has.any.reads"); table.addColumn("is.paired.end"); table.addColumn("n.reads.analyzed", "%d"); table.addColumn("simple.read.type", "%s"); table.addColumn("median.read.length"); table.addColumn("median.insert.size"); for (final SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups()) { final String rgID = rg.getId(); table.addRowID(rgID, true); PerReadGroupInfo info = readGroupInfo.get(rgID); // we are paired if > 25% of reads are paired final boolean isPaired = info.nReadsPaired / (1.0 * (info.nReadsSeen + 1)) > 0.25; final boolean hasAnyReads = info.nReadsSeen > 0; final int readLength = info.readLength.getMedian(0); setTableValue(table, rgID, "sample", rg.getSample()); setTableValue(table, rgID, "library", rg.getLibrary()); setTableValue(table, rgID, "platform", rg.getPlatform()); setTableValue(table, rgID, "center", rg.getSequencingCenter()); try { setTableValue( table, rgID, "date", rg.getRunDate() != null ? dateFormatter.format(rg.getRunDate()) : "NA"); } catch (NullPointerException e) { // TODO: remove me when bug in Picard is fixed that causes NPE when date isn't present setTableValue(table, rgID, "date", "NA"); } setTableValue(table, rgID, "has.any.reads", hasAnyReads); setTableValue(table, rgID, "is.paired.end", isPaired); setTableValue(table, rgID, "n.reads.analyzed", info.nReadsSeen); setTableValue( table, rgID, "simple.read.type", hasAnyReads ? String.format("%dx%d", isPaired ? 2 : 1, readLength) : "NA"); setTableValue(table, rgID, "median.read.length", hasAnyReads ? readLength : "NA"); setTableValue( table, rgID, "median.insert.size", hasAnyReads && isPaired ? info.insertSize.getMedian(0) : "NA"); } report.print(out); }