private static void findIndividualPerSequenceCoverage(
     Calibrator calibrator,
     Map<String, Integer> sequenceLengths,
     Map<String, String> readGroupToSampleId,
     final Map<String, HashMap<String, CalibrationStats>> local,
     RegionRestriction restriction) {
   final Covariate rgCovariate =
       calibrator.getCovariate(calibrator.getCovariateIndex(CovariateEnum.READGROUP));
   final Covariate seqCovariate =
       calibrator.getCovariate(calibrator.getCovariateIndex(CovariateEnum.SEQUENCE));
   for (final Map.Entry<String, Integer> entry : sequenceLengths.entrySet()) {
     final String sequenceName = entry.getKey();
     if (restriction != null && !sequenceName.equals(restriction.getSequenceName())) {
       continue;
     }
     for (final Map.Entry<String, String> e2 : readGroupToSampleId.entrySet()) {
       final String readGroup = e2.getKey();
       final String sampleName = e2.getValue();
       final int rgValue = rgCovariate.valueOf(readGroup);
       final int seqValue = seqCovariate.valueOf(sequenceName);
       if (rgValue == -1 || seqValue == -1) {
         add(local, sampleName, sequenceName, new CalibrationStats(null));
       } else {
         final Calibrator.QuerySpec spec = calibrator.initQuery();
         spec.setValue(CovariateEnum.READGROUP, rgValue);
         spec.setValue(CovariateEnum.SEQUENCE, seqValue);
         calibrator.processStats(new LocalStatsProcessor(local, sampleName, sequenceName), spec);
       }
     }
   }
 }
Beispiel #2
0
 public SingleRestrictionLineReader(File input, TabixIndexReader tir, RegionRestriction region)
     throws IOException {
   if (region == null) {
     throw new NullPointerException();
   }
   mSequence = region.getSequenceName();
   mBeg = region.getStart();
   mEnd = region.getEnd();
   final BlockCompressedLineReader bclr =
       new BlockCompressedLineReader(new BlockCompressedInputStream(input));
   mBCPositionReader =
       tir.getOptions().mFormat == TabixIndexer.TabixOptions.FORMAT_VCF
           ? new VcfPositionReader(bclr, tir.getOptions().mSkip)
           : new GenericPositionReader(bclr, tir.getOptions());
   mRange = tir.getFilePointers(region);
   if (mRange != null) {
     mBCPositionReader.seek(mRange.start(0));
   }
 }
Beispiel #3
0
  /**
   * Finish constructing the references and link in duplicates.
   *
   * @param names the ordered set of reference sequence names
   * @param parse results from parsing.
   */
  private void postProcessing(Map<String, Integer> names, final ReferenceParse parse) {
    mReferences = new LinkedHashMap<>();
    // deal with sequences in genome but explicitly described.
    for (final Map.Entry<String, Integer> entry : names.entrySet()) {
      final String name = entry.getKey();
      if (parse.mReferences.containsKey(
          name)) { // have seen explicitly declared seq line, so copy from parse
        mReferences.put(name, parse.mReferences.get(name));
        continue;
      }
      if (parse.mPloidyDefault == null) {
        parse.error("No default specified but required for sequence:" + name);
        return;
      }
      try {
        mReferences.put(
            name,
            new ReferenceSequence(
                false, parse.mLinearDefault, parse.mPloidyDefault, name, null, entry.getValue()));
      } catch (final IllegalArgumentException e) {
        throw new NoTalkbackSlimException("Invalid reference file. " + e.getMessage());
      }
    }

    // add duplicate information into sequence entries
    for (final Pair<RegionRestriction, RegionRestriction> duplicate : parse.mDuplicates) {
      final RegionRestriction r1 = duplicate.getA();
      if (!mReferences.containsKey(r1.getSequenceName())) {
        continue;
      }
      final RegionRestriction r2 = duplicate.getB();
      if (!mReferences.containsKey(r2.getSequenceName())) {
        continue;
      }
      try {
        mReferences.get(r1.getSequenceName()).addDuplicate(duplicate);
        mReferences.get(r2.getSequenceName()).addDuplicate(duplicate);
      } catch (final IllegalArgumentException e) {
        throw new NoTalkbackSlimException("Invalid reference file. " + e.getMessage());
      }
    }
  }
 /**
  * @param calibrator the mapping calibration stats to use when computing coverage
  * @param defaultSequenceLengths map of sequence names to sequence length, used if not already set
  *     in the calibrator
  * @param readGroupToSampleId read group to sample id
  * @param restriction a region restriction, may be null
  */
 public CalibratedPerSequenceExpectedCoverage(
     Calibrator calibrator,
     Map<String, Integer> defaultSequenceLengths,
     Map<String, String> readGroupToSampleId,
     RegionRestriction restriction) {
   mSequenceSampleCoverages = new HashMap<>();
   mSumCoverages = new HashMap<>();
   mSamples = Collections.unmodifiableSet(new HashSet<>(readGroupToSampleId.values()));
   final Map<String, Integer> sequenceLengths =
       calibrator.hasLengths() ? calibrator.getSequenceLengths() : defaultSequenceLengths;
   if (calibrator.getCovariateIndex(CovariateEnum.SEQUENCE)
       == -1) { // No per sequence separation in calibration data, calculate per-genome coverage
                // level
     long length = 0;
     for (final Map.Entry<String, Integer> entry : sequenceLengths.entrySet()) {
       length += entry.getValue();
     }
     final Map<String, HashMap<String, CalibrationStats>> local = new HashMap<>();
     findIndividualGlobalCoverage(calibrator, readGroupToSampleId, local);
     double currentMax = 0;
     double currentSum = 0;
     for (Map.Entry<String, HashMap<String, CalibrationStats>> e : local.entrySet()) {
       final HashMap<String, CalibrationStats> map = e.getValue();
       if (map.containsKey(DUMMY_SEQ)) {
         final double currentCov =
             (length == 0) ? 0 : (double) map.get(DUMMY_SEQ).getTotalLength() / length;
         final String sampleName = e.getKey();
         Diagnostic.userLog(
             "Average coverage for sample "
                 + sampleName
                 + " is "
                 + Utils.realFormat(currentCov, 2));
         for (final Map.Entry<String, Integer> entry : sequenceLengths.entrySet()) {
           final String seqName = entry.getKey();
           Map<String, Double> samples = mSequenceSampleCoverages.get(seqName);
           if (samples == null) {
             samples = new HashMap<>();
             mSequenceSampleCoverages.put(seqName, samples);
           }
           samples.put(sampleName, currentCov);
         }
         currentSum += currentCov;
         if (currentCov > currentMax) {
           currentMax = currentCov;
         }
       }
     }
     Diagnostic.userLog("Average combined coverage is " + Utils.realFormat(currentSum, 2));
     for (final Map.Entry<String, Integer> entry : sequenceLengths.entrySet()) {
       final String seqName = entry.getKey();
       mSumCoverages.put(seqName, currentSum);
     }
   } else { // Per-sequence separation is in calibration data, calculate per-sequence coverage
            // level
     final Map<String, HashMap<String, CalibrationStats>> local = new HashMap<>();
     findIndividualPerSequenceCoverage(
         calibrator, sequenceLengths, readGroupToSampleId, local, restriction);
     for (final Map.Entry<String, Integer> entry : sequenceLengths.entrySet()) {
       final String seqName = entry.getKey();
       if (restriction != null && !seqName.equals(restriction.getSequenceName())) {
         continue;
       }
       final int seqLength = entry.getValue();
       double currentMax = 0;
       double currentSum = 0;
       for (Map.Entry<String, HashMap<String, CalibrationStats>> e : local.entrySet()) {
         final HashMap<String, CalibrationStats> map = e.getValue();
         if (map.containsKey(seqName)) {
           final double currentCov =
               (seqLength == 0) ? 0 : (double) map.get(seqName).getTotalLength() / seqLength;
           final String sampleName = e.getKey();
           Diagnostic.userLog(
               "Average coverage across sequence "
                   + seqName
                   + " for sample "
                   + sampleName
                   + " is "
                   + Utils.realFormat(currentCov, 2));
           Map<String, Double> samples = mSequenceSampleCoverages.get(seqName);
           if (samples == null) {
             samples = new HashMap<>();
             mSequenceSampleCoverages.put(seqName, samples);
           }
           samples.put(sampleName, currentCov);
           currentSum += currentCov;
           if (currentCov > currentMax) {
             currentMax = currentCov;
           }
         }
       }
       Diagnostic.userLog(
           "Average combined coverage for sequence "
               + seqName
               + " is "
               + Utils.realFormat(currentSum, 2));
       mSumCoverages.put(seqName, currentSum);
     }
   }
 }