private static void findIndividualPerSequenceCoverage( Calibrator calibrator, Map<String, Integer> sequenceLengths, Map<String, String> readGroupToSampleId, final Map<String, HashMap<String, CalibrationStats>> local, RegionRestriction restriction) { final Covariate rgCovariate = calibrator.getCovariate(calibrator.getCovariateIndex(CovariateEnum.READGROUP)); final Covariate seqCovariate = calibrator.getCovariate(calibrator.getCovariateIndex(CovariateEnum.SEQUENCE)); for (final Map.Entry<String, Integer> entry : sequenceLengths.entrySet()) { final String sequenceName = entry.getKey(); if (restriction != null && !sequenceName.equals(restriction.getSequenceName())) { continue; } for (final Map.Entry<String, String> e2 : readGroupToSampleId.entrySet()) { final String readGroup = e2.getKey(); final String sampleName = e2.getValue(); final int rgValue = rgCovariate.valueOf(readGroup); final int seqValue = seqCovariate.valueOf(sequenceName); if (rgValue == -1 || seqValue == -1) { add(local, sampleName, sequenceName, new CalibrationStats(null)); } else { final Calibrator.QuerySpec spec = calibrator.initQuery(); spec.setValue(CovariateEnum.READGROUP, rgValue); spec.setValue(CovariateEnum.SEQUENCE, seqValue); calibrator.processStats(new LocalStatsProcessor(local, sampleName, sequenceName), spec); } } } }
public SingleRestrictionLineReader(File input, TabixIndexReader tir, RegionRestriction region) throws IOException { if (region == null) { throw new NullPointerException(); } mSequence = region.getSequenceName(); mBeg = region.getStart(); mEnd = region.getEnd(); final BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(input)); mBCPositionReader = tir.getOptions().mFormat == TabixIndexer.TabixOptions.FORMAT_VCF ? new VcfPositionReader(bclr, tir.getOptions().mSkip) : new GenericPositionReader(bclr, tir.getOptions()); mRange = tir.getFilePointers(region); if (mRange != null) { mBCPositionReader.seek(mRange.start(0)); } }
/** * Finish constructing the references and link in duplicates. * * @param names the ordered set of reference sequence names * @param parse results from parsing. */ private void postProcessing(Map<String, Integer> names, final ReferenceParse parse) { mReferences = new LinkedHashMap<>(); // deal with sequences in genome but explicitly described. for (final Map.Entry<String, Integer> entry : names.entrySet()) { final String name = entry.getKey(); if (parse.mReferences.containsKey( name)) { // have seen explicitly declared seq line, so copy from parse mReferences.put(name, parse.mReferences.get(name)); continue; } if (parse.mPloidyDefault == null) { parse.error("No default specified but required for sequence:" + name); return; } try { mReferences.put( name, new ReferenceSequence( false, parse.mLinearDefault, parse.mPloidyDefault, name, null, entry.getValue())); } catch (final IllegalArgumentException e) { throw new NoTalkbackSlimException("Invalid reference file. " + e.getMessage()); } } // add duplicate information into sequence entries for (final Pair<RegionRestriction, RegionRestriction> duplicate : parse.mDuplicates) { final RegionRestriction r1 = duplicate.getA(); if (!mReferences.containsKey(r1.getSequenceName())) { continue; } final RegionRestriction r2 = duplicate.getB(); if (!mReferences.containsKey(r2.getSequenceName())) { continue; } try { mReferences.get(r1.getSequenceName()).addDuplicate(duplicate); mReferences.get(r2.getSequenceName()).addDuplicate(duplicate); } catch (final IllegalArgumentException e) { throw new NoTalkbackSlimException("Invalid reference file. " + e.getMessage()); } } }
/** * @param calibrator the mapping calibration stats to use when computing coverage * @param defaultSequenceLengths map of sequence names to sequence length, used if not already set * in the calibrator * @param readGroupToSampleId read group to sample id * @param restriction a region restriction, may be null */ public CalibratedPerSequenceExpectedCoverage( Calibrator calibrator, Map<String, Integer> defaultSequenceLengths, Map<String, String> readGroupToSampleId, RegionRestriction restriction) { mSequenceSampleCoverages = new HashMap<>(); mSumCoverages = new HashMap<>(); mSamples = Collections.unmodifiableSet(new HashSet<>(readGroupToSampleId.values())); final Map<String, Integer> sequenceLengths = calibrator.hasLengths() ? calibrator.getSequenceLengths() : defaultSequenceLengths; if (calibrator.getCovariateIndex(CovariateEnum.SEQUENCE) == -1) { // No per sequence separation in calibration data, calculate per-genome coverage // level long length = 0; for (final Map.Entry<String, Integer> entry : sequenceLengths.entrySet()) { length += entry.getValue(); } final Map<String, HashMap<String, CalibrationStats>> local = new HashMap<>(); findIndividualGlobalCoverage(calibrator, readGroupToSampleId, local); double currentMax = 0; double currentSum = 0; for (Map.Entry<String, HashMap<String, CalibrationStats>> e : local.entrySet()) { final HashMap<String, CalibrationStats> map = e.getValue(); if (map.containsKey(DUMMY_SEQ)) { final double currentCov = (length == 0) ? 0 : (double) map.get(DUMMY_SEQ).getTotalLength() / length; final String sampleName = e.getKey(); Diagnostic.userLog( "Average coverage for sample " + sampleName + " is " + Utils.realFormat(currentCov, 2)); for (final Map.Entry<String, Integer> entry : sequenceLengths.entrySet()) { final String seqName = entry.getKey(); Map<String, Double> samples = mSequenceSampleCoverages.get(seqName); if (samples == null) { samples = new HashMap<>(); mSequenceSampleCoverages.put(seqName, samples); } samples.put(sampleName, currentCov); } currentSum += currentCov; if (currentCov > currentMax) { currentMax = currentCov; } } } Diagnostic.userLog("Average combined coverage is " + Utils.realFormat(currentSum, 2)); for (final Map.Entry<String, Integer> entry : sequenceLengths.entrySet()) { final String seqName = entry.getKey(); mSumCoverages.put(seqName, currentSum); } } else { // Per-sequence separation is in calibration data, calculate per-sequence coverage // level final Map<String, HashMap<String, CalibrationStats>> local = new HashMap<>(); findIndividualPerSequenceCoverage( calibrator, sequenceLengths, readGroupToSampleId, local, restriction); for (final Map.Entry<String, Integer> entry : sequenceLengths.entrySet()) { final String seqName = entry.getKey(); if (restriction != null && !seqName.equals(restriction.getSequenceName())) { continue; } final int seqLength = entry.getValue(); double currentMax = 0; double currentSum = 0; for (Map.Entry<String, HashMap<String, CalibrationStats>> e : local.entrySet()) { final HashMap<String, CalibrationStats> map = e.getValue(); if (map.containsKey(seqName)) { final double currentCov = (seqLength == 0) ? 0 : (double) map.get(seqName).getTotalLength() / seqLength; final String sampleName = e.getKey(); Diagnostic.userLog( "Average coverage across sequence " + seqName + " for sample " + sampleName + " is " + Utils.realFormat(currentCov, 2)); Map<String, Double> samples = mSequenceSampleCoverages.get(seqName); if (samples == null) { samples = new HashMap<>(); mSequenceSampleCoverages.put(seqName, samples); } samples.put(sampleName, currentCov); currentSum += currentCov; if (currentCov > currentMax) { currentMax = currentCov; } } } Diagnostic.userLog( "Average combined coverage for sequence " + seqName + " is " + Utils.realFormat(currentSum, 2)); mSumCoverages.put(seqName, currentSum); } } }