@Override public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) { final Snapshot snapshot = histogram.getSnapshot(); stream.printf(locale, " min = %2.2f\n", histogram.getMin()); stream.printf(locale, " max = %2.2f\n", histogram.getMax()); stream.printf(locale, " mean = %2.2f\n", histogram.getMean()); stream.printf(locale, " stddev = %2.2f\n", histogram.getStdDev()); stream.printf(locale, " median = %2.2f\n", snapshot.getMedian()); stream.printf(locale, " 75%% <= %2.2f\n", snapshot.get75thPercentile()); stream.printf(locale, " 95%% <= %2.2f\n", snapshot.get95thPercentile()); stream.printf(locale, " 98%% <= %2.2f\n", snapshot.get98thPercentile()); stream.printf(locale, " 99%% <= %2.2f\n", snapshot.get99thPercentile()); stream.printf(locale, " 99.9%% <= %2.2f\n", snapshot.get999thPercentile()); }
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); // Setup all the inputs final ProgressLogger progress = new ProgressLogger(log, 10000000, "Processed", "loci"); final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE); final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); final SamLocusIterator iterator = getLocusIterator(in); final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>(); final CountingFilter dupeFilter = new CountingDuplicateFilter(); final CountingFilter mapqFilter = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY); final CountingPairedFilter pairFilter = new CountingPairedFilter(); filters.add(mapqFilter); filters.add(dupeFilter); if (!COUNT_UNPAIRED) { filters.add(pairFilter); } filters.add( new SecondaryAlignmentFilter()); // Not a counting filter because we never want to count // reads twice iterator.setSamFilters(filters); iterator.setEmitUncoveredLoci(true); iterator.setMappingQualityScoreCutoff(0); // Handled separately because we want to count bases iterator.setQualityScoreCutoff(0); // Handled separately because we want to count bases iterator.setIncludeNonPfReads(false); final int max = COVERAGE_CAP; final long[] HistogramArray = new long[max + 1]; final long[] baseQHistogramArray = new long[Byte.MAX_VALUE]; final boolean usingStopAfter = STOP_AFTER > 0; final long stopAfter = STOP_AFTER - 1; long counter = 0; long basesExcludedByBaseq = 0; long basesExcludedByOverlap = 0; long basesExcludedByCapping = 0; // Loop through all the loci while (iterator.hasNext()) { final SamLocusIterator.LocusInfo info = iterator.next(); // Check that the reference is not N final ReferenceSequence ref = refWalker.get(info.getSequenceIndex()); final byte base = ref.getBases()[info.getPosition() - 1]; if (base == 'N') continue; // Figure out the coverage while not counting overlapping reads twice, and excluding various // things final HashSet<String> readNames = new HashSet<String>(info.getRecordAndPositions().size()); int pileupSize = 0; for (final SamLocusIterator.RecordAndOffset recs : info.getRecordAndPositions()) { if (recs.getBaseQuality() < MINIMUM_BASE_QUALITY) { ++basesExcludedByBaseq; continue; } if (!readNames.add(recs.getRecord().getReadName())) { ++basesExcludedByOverlap; continue; } pileupSize++; if (pileupSize <= max) { baseQHistogramArray[recs.getRecord().getBaseQualities()[recs.getOffset()]]++; } } final int depth = Math.min(readNames.size(), max); if (depth < readNames.size()) basesExcludedByCapping += readNames.size() - max; HistogramArray[depth]++; // Record progress and perhaps stop progress.record(info.getSequenceName(), info.getPosition()); if (usingStopAfter && ++counter > stopAfter) break; } // Construct and write the outputs final Histogram<Integer> histo = new Histogram<Integer>("coverage", "count"); for (int i = 0; i < HistogramArray.length; ++i) { histo.increment(i, HistogramArray[i]); } // Construct and write the outputs final Histogram<Integer> baseQHisto = new Histogram<Integer>("value", "baseq_count"); for (int i = 0; i < baseQHistogramArray.length; ++i) { baseQHisto.increment(i, baseQHistogramArray[i]); } final WgsMetrics metrics = generateWgsMetrics(); metrics.GENOME_TERRITORY = (long) histo.getSumOfValues(); metrics.MEAN_COVERAGE = histo.getMean(); metrics.SD_COVERAGE = histo.getStandardDeviation(); metrics.MEDIAN_COVERAGE = histo.getMedian(); metrics.MAD_COVERAGE = histo.getMedianAbsoluteDeviation(); final long basesExcludedByDupes = getBasesExcludedBy(dupeFilter); final long basesExcludedByMapq = getBasesExcludedBy(mapqFilter); final long basesExcludedByPairing = getBasesExcludedBy(pairFilter); final double total = histo.getSum(); final double totalWithExcludes = total + basesExcludedByDupes + basesExcludedByMapq + basesExcludedByPairing + basesExcludedByBaseq + basesExcludedByOverlap + basesExcludedByCapping; metrics.PCT_EXC_DUPE = basesExcludedByDupes / totalWithExcludes; metrics.PCT_EXC_MAPQ = basesExcludedByMapq / totalWithExcludes; metrics.PCT_EXC_UNPAIRED = basesExcludedByPairing / totalWithExcludes; metrics.PCT_EXC_BASEQ = basesExcludedByBaseq / totalWithExcludes; metrics.PCT_EXC_OVERLAP = basesExcludedByOverlap / totalWithExcludes; metrics.PCT_EXC_CAPPED = basesExcludedByCapping / totalWithExcludes; metrics.PCT_EXC_TOTAL = (totalWithExcludes - total) / totalWithExcludes; metrics.PCT_1X = MathUtil.sum(HistogramArray, 1, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_5X = MathUtil.sum(HistogramArray, 5, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_10X = MathUtil.sum(HistogramArray, 10, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_15X = MathUtil.sum(HistogramArray, 15, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_20X = MathUtil.sum(HistogramArray, 20, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_25X = MathUtil.sum(HistogramArray, 25, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_30X = MathUtil.sum(HistogramArray, 30, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_40X = MathUtil.sum(HistogramArray, 40, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_50X = MathUtil.sum(HistogramArray, 50, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_60X = MathUtil.sum(HistogramArray, 60, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_70X = MathUtil.sum(HistogramArray, 70, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_80X = MathUtil.sum(HistogramArray, 80, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_90X = MathUtil.sum(HistogramArray, 90, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; metrics.PCT_100X = MathUtil.sum(HistogramArray, 100, HistogramArray.length) / (double) metrics.GENOME_TERRITORY; final MetricsFile<WgsMetrics, Integer> out = getMetricsFile(); out.addMetric(metrics); out.addHistogram(histo); if (INCLUDE_BQ_HISTOGRAM) { out.addHistogram(baseQHisto); } out.write(OUTPUT); return 0; }
@Test public void reportsHistogramValues() throws Exception { final Histogram histogram = mock(Histogram.class); when(histogram.getCount()).thenReturn(1L); when(histogram.getMax()).thenReturn(2L); when(histogram.getMean()).thenReturn(3.0); when(histogram.getMin()).thenReturn(4L); when(histogram.getStdDev()).thenReturn(5.0); final Snapshot snapshot = mock(Snapshot.class); when(snapshot.getMedian()).thenReturn(6.0); when(snapshot.get75thPercentile()).thenReturn(7.0); when(snapshot.get95thPercentile()).thenReturn(8.0); when(snapshot.get98thPercentile()).thenReturn(9.0); when(snapshot.get99thPercentile()).thenReturn(10.0); when(snapshot.get999thPercentile()).thenReturn(11.0); when(histogram.getSnapshot()).thenReturn(snapshot); reporter.report( this.<Gauge>map(), this.<Counter>map(), map("test.histogram", histogram), this.<Meter>map(), this.<Timer>map()); verify(ganglia) .announce( "test.histogram.count", "1", GMetricType.DOUBLE, "", GMetricSlope.BOTH, 60, 0, "test"); verify(ganglia) .announce( "test.histogram.max", "2", GMetricType.DOUBLE, "", GMetricSlope.BOTH, 60, 0, "test"); verify(ganglia) .announce( "test.histogram.mean", "3.0", GMetricType.DOUBLE, "", GMetricSlope.BOTH, 60, 0, "test"); verify(ganglia) .announce( "test.histogram.min", "4", GMetricType.DOUBLE, "", GMetricSlope.BOTH, 60, 0, "test"); verify(ganglia) .announce( "test.histogram.stddev", "5.0", GMetricType.DOUBLE, "", GMetricSlope.BOTH, 60, 0, "test"); verify(ganglia) .announce( "test.histogram.p50", "6.0", GMetricType.DOUBLE, "", GMetricSlope.BOTH, 60, 0, "test"); verify(ganglia) .announce( "test.histogram.p75", "7.0", GMetricType.DOUBLE, "", GMetricSlope.BOTH, 60, 0, "test"); verify(ganglia) .announce( "test.histogram.p95", "8.0", GMetricType.DOUBLE, "", GMetricSlope.BOTH, 60, 0, "test"); verify(ganglia) .announce( "test.histogram.p98", "9.0", GMetricType.DOUBLE, "", GMetricSlope.BOTH, 60, 0, "test"); verify(ganglia) .announce( "test.histogram.p99", "10.0", GMetricType.DOUBLE, "", GMetricSlope.BOTH, 60, 0, "test"); verify(ganglia) .announce( "test.histogram.p999", "11.0", GMetricType.DOUBLE, "", GMetricSlope.BOTH, 60, 0, "test"); verifyNoMoreInteractions(ganglia); }