Ejemplo n.º 1
0
  /**
   * Returns the duplicate score computed from the given fragment. value should be capped by
   * Short.MAX_VALUE/2 since the score from two reads will be added and an overflow will be
   *
   * <p>If true is given to assumeMateCigar, then any score that can use the mate cigar to compute
   * the mate's score will return the score computed on both ends.
   */
  public static short computeDuplicateScore(
      final SAMRecord record,
      final ScoringStrategy scoringStrategy,
      final boolean assumeMateCigar) {
    Short storedScore = (Short) record.getTransientAttribute(Attr.DuplicateScore);

    if (storedScore == null) {
      short score = 0;
      switch (scoringStrategy) {
        case SUM_OF_BASE_QUALITIES:
          // two (very) long reads worth of high-quality bases can go over Short.MAX_VALUE/2
          // and risk overflow.
          score += (short) Math.min(getSumOfBaseQualities(record), Short.MAX_VALUE / 2);
          break;
        case TOTAL_MAPPED_REFERENCE_LENGTH:
          if (!record.getReadUnmappedFlag()) {
            // no need to remember the score since this scoring mechanism is symmetric
            score = (short) Math.min(record.getCigar().getReferenceLength(), Short.MAX_VALUE / 2);
          }
          if (assumeMateCigar && record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
            score +=
                (short)
                    Math.min(
                        SAMUtils.getMateCigar(record).getReferenceLength(), Short.MAX_VALUE / 2);
          }
          break;
          // The RANDOM score gives the same score to both reads so that they get filtered together.
          // it's not critical do use the readName since the scores from both ends get added, but it
          // seem
          // to be clearer this way.
        case RANDOM:
          // start with a random number between Short.MIN_VALUE/4 and Short.MAX_VALUE/4
          score += (short) (hasher.hashUnencodedChars(record.getReadName()) & 0b11_1111_1111_1111);
          // subtract Short.MIN_VALUE/4 from it to end up with a number between
          // 0 and Short.MAX_VALUE/2. This number can be then discounted in case the read is
          // not passing filters. We need to stay far from overflow so that when we add the two
          // scores from the two read mates we do not overflow since that could cause us to chose a
          // failing read-pair instead of a passing one.
          score -= Short.MIN_VALUE / 4;
      }

      // make sure that filter-failing records are heavily discounted. (the discount can happen
      // twice, once
      // for each mate, so need to make sure we do not subtract more than Short.MIN_VALUE overall.)
      score += record.getReadFailsVendorQualityCheckFlag() ? (short) (Short.MIN_VALUE / 2) : 0;

      storedScore = score;
      record.setTransientAttribute(Attr.DuplicateScore, storedScore);
    }

    return storedScore;
  }
      private void collectReadData(final SAMRecord record, final ReferenceSequence ref) {
        metrics.TOTAL_READS++;
        readLengthHistogram.increment(record.getReadBases().length);

        if (!record.getReadFailsVendorQualityCheckFlag()) {
          metrics.PF_READS++;
          if (isNoiseRead(record)) metrics.PF_NOISE_READS++;

          if (record.getReadUnmappedFlag()) {
            // If the read is unmapped see if it's adapter sequence
            final byte[] readBases = record.getReadBases();
            if (!(record instanceof BAMRecord)) StringUtil.toUpperCase(readBases);

            if (isAdapterSequence(readBases)) {
              this.adapterReads++;
            }
          } else if (doRefMetrics) {
            metrics.PF_READS_ALIGNED++;
            if (!record.getReadNegativeStrandFlag()) numPositiveStrand++;

            if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
              metrics.READS_ALIGNED_IN_PAIRS++;

              // Check that both ends have mapq > minimum
              final Integer mateMq = record.getIntegerAttribute("MQ");
              if (mateMq == null
                  || mateMq >= MAPPING_QUALITY_THRESOLD
                      && record.getMappingQuality() >= MAPPING_QUALITY_THRESOLD) {
                ++this.chimerasDenominator;

                // With both reads mapped we can see if this pair is chimeric
                if (Math.abs(record.getInferredInsertSize()) > maxInsertSize
                    || !record.getReferenceIndex().equals(record.getMateReferenceIndex())) {
                  ++this.chimeras;
                }
              }
            }
          }
        }
      }
 private boolean isHighQualityMapping(final SAMRecord record) {
   return !record.getReadFailsVendorQualityCheckFlag()
       && record.getMappingQuality() >= MAPPING_QUALITY_THRESOLD;
 }
      private void collectQualityData(final SAMRecord record, final ReferenceSequence reference) {
        // If the read isnt an aligned PF read then look at the read for no-calls
        if (record.getReadUnmappedFlag()
            || record.getReadFailsVendorQualityCheckFlag()
            || !doRefMetrics) {
          final byte[] readBases = record.getReadBases();
          for (int i = 0; i < readBases.length; i++) {
            if (SequenceUtil.isNoCall(readBases[i])) {
              badCycleHistogram.increment(
                  CoordMath.getCycle(record.getReadNegativeStrandFlag(), readBases.length, i));
            }
          }
        } else if (!record.getReadFailsVendorQualityCheckFlag()) {
          final boolean highQualityMapping = isHighQualityMapping(record);
          if (highQualityMapping) metrics.PF_HQ_ALIGNED_READS++;

          final byte[] readBases = record.getReadBases();
          final byte[] refBases = reference.getBases();
          final byte[] qualities = record.getBaseQualities();
          final int refLength = refBases.length;
          long mismatchCount = 0;
          long hqMismatchCount = 0;

          for (final AlignmentBlock alignmentBlock : record.getAlignmentBlocks()) {
            final int readIndex = alignmentBlock.getReadStart() - 1;
            final int refIndex = alignmentBlock.getReferenceStart() - 1;
            final int length = alignmentBlock.getLength();

            for (int i = 0; i < length && refIndex + i < refLength; ++i) {
              final int readBaseIndex = readIndex + i;
              boolean mismatch =
                  !SequenceUtil.basesEqual(readBases[readBaseIndex], refBases[refIndex + i]);
              boolean bisulfiteBase = false;
              if (mismatch && isBisulfiteSequenced) {
                if ((record.getReadNegativeStrandFlag()
                        && (refBases[refIndex + i] == 'G' || refBases[refIndex + i] == 'g')
                        && (readBases[readBaseIndex] == 'A' || readBases[readBaseIndex] == 'a'))
                    || ((!record.getReadNegativeStrandFlag())
                            && (refBases[refIndex + i] == 'C' || refBases[refIndex + i] == 'c')
                            && (readBases[readBaseIndex] == 'T')
                        || readBases[readBaseIndex] == 't')) {

                  bisulfiteBase = true;
                  mismatch = false;
                }
              }

              if (mismatch) mismatchCount++;

              metrics.PF_ALIGNED_BASES++;
              if (!bisulfiteBase) nonBisulfiteAlignedBases++;

              if (highQualityMapping) {
                metrics.PF_HQ_ALIGNED_BASES++;
                if (!bisulfiteBase) hqNonBisulfiteAlignedBases++;
                if (qualities[readBaseIndex] >= BASE_QUALITY_THRESHOLD)
                  metrics.PF_HQ_ALIGNED_Q20_BASES++;
                if (mismatch) hqMismatchCount++;
              }

              if (mismatch || SequenceUtil.isNoCall(readBases[readBaseIndex])) {
                badCycleHistogram.increment(
                    CoordMath.getCycle(record.getReadNegativeStrandFlag(), readBases.length, i));
              }
            }
          }

          mismatchHistogram.increment(mismatchCount);
          hqMismatchHistogram.increment(hqMismatchCount);

          // Add any insertions and/or deletions to the global count
          for (final CigarElement elem : record.getCigar().getCigarElements()) {
            final CigarOperator op = elem.getOperator();
            if (op == CigarOperator.INSERTION || op == CigarOperator.DELETION) ++this.indels;
          }
        }
      }