Ejemplo n.º 1
0
  /**
   * Compare two records based on their duplicate scores. If the scores are equal, we break ties
   * based on mapping quality (added to the mate's mapping quality if paired and mapped), then
   * library/read name.
   *
   * <p>If true is given to assumeMateCigar, then any score that can use the mate cigar to to
   * compute the mate's score will return the score computed on both ends.
   *
   * <p>We allow different scoring strategies. We return <0 if rec1 has a better strategy than rec2.
   */
  public static int compare(
      final SAMRecord rec1,
      final SAMRecord rec2,
      final ScoringStrategy scoringStrategy,
      final boolean assumeMateCigar) {
    int cmp;

    // always prefer paired over non-paired
    if (rec1.getReadPairedFlag() != rec2.getReadPairedFlag())
      return rec1.getReadPairedFlag() ? 1 : -1;

    cmp =
        computeDuplicateScore(rec2, scoringStrategy, assumeMateCigar)
            - computeDuplicateScore(rec1, scoringStrategy, assumeMateCigar);

    /**
     * Finally, use library ID and read name This is important because we cannot control the order
     * in which reads appear for reads that are comparable up to now (i.e. cmp == 0). We want to
     * deterministically choose them, and so we need this.
     */
    if (0 == cmp)
      cmp = SAMUtils.getCanonicalRecordName(rec1).compareTo(SAMUtils.getCanonicalRecordName(rec2));

    return cmp;
  }
Ejemplo n.º 2
0
  /**
   * Returns the duplicate score computed from the given fragment. value should be capped by
   * Short.MAX_VALUE/2 since the score from two reads will be added and an overflow will be
   *
   * <p>If true is given to assumeMateCigar, then any score that can use the mate cigar to compute
   * the mate's score will return the score computed on both ends.
   */
  public static short computeDuplicateScore(
      final SAMRecord record,
      final ScoringStrategy scoringStrategy,
      final boolean assumeMateCigar) {
    Short storedScore = (Short) record.getTransientAttribute(Attr.DuplicateScore);

    if (storedScore == null) {
      short score = 0;
      switch (scoringStrategy) {
        case SUM_OF_BASE_QUALITIES:
          // two (very) long reads worth of high-quality bases can go over Short.MAX_VALUE/2
          // and risk overflow.
          score += (short) Math.min(getSumOfBaseQualities(record), Short.MAX_VALUE / 2);
          break;
        case TOTAL_MAPPED_REFERENCE_LENGTH:
          if (!record.getReadUnmappedFlag()) {
            // no need to remember the score since this scoring mechanism is symmetric
            score = (short) Math.min(record.getCigar().getReferenceLength(), Short.MAX_VALUE / 2);
          }
          if (assumeMateCigar && record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
            score +=
                (short)
                    Math.min(
                        SAMUtils.getMateCigar(record).getReferenceLength(), Short.MAX_VALUE / 2);
          }
          break;
          // The RANDOM score gives the same score to both reads so that they get filtered together.
          // it's not critical do use the readName since the scores from both ends get added, but it
          // seem
          // to be clearer this way.
        case RANDOM:
          // start with a random number between Short.MIN_VALUE/4 and Short.MAX_VALUE/4
          score += (short) (hasher.hashUnencodedChars(record.getReadName()) & 0b11_1111_1111_1111);
          // subtract Short.MIN_VALUE/4 from it to end up with a number between
          // 0 and Short.MAX_VALUE/2. This number can be then discounted in case the read is
          // not passing filters. We need to stay far from overflow so that when we add the two
          // scores from the two read mates we do not overflow since that could cause us to chose a
          // failing read-pair instead of a passing one.
          score -= Short.MIN_VALUE / 4;
      }

      // make sure that filter-failing records are heavily discounted. (the discount can happen
      // twice, once
      // for each mate, so need to make sure we do not subtract more than Short.MIN_VALUE overall.)
      score += record.getReadFailsVendorQualityCheckFlag() ? (short) (Short.MIN_VALUE / 2) : 0;

      storedScore = score;
      record.setTransientAttribute(Attr.DuplicateScore, storedScore);
    }

    return storedScore;
  }
    public void acceptRecord(final SAMRecordAndReference args) {
      final SAMRecord rec = args.getSamRecord();
      final ReferenceSequence ref = args.getReferenceSequence();

      if (rec.getReadPairedFlag()) {
        if (rec.getFirstOfPairFlag()) {
          firstOfPairCollector.addRecord(rec, ref);
        } else {
          secondOfPairCollector.addRecord(rec, ref);
        }

        pairCollector.addRecord(rec, ref);
      } else {
        unpairedCollector.addRecord(rec, ref);
      }
    }
Ejemplo n.º 4
0
Archivo: Align.java Proyecto: nh13/SRMA
  private static void removeMateInfo(SAMRecord rec) {
    if (rec.getReadPairedFlag()) {
      // Remove all information of its mate

      // flag
      rec.setProperPairFlag(false); // not paired any more
      rec.setMateUnmappedFlag(false);
      rec.setMateNegativeStrandFlag(false);

      // entries
      rec.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
      rec.setMateAlignmentStart(0);
      rec.setInferredInsertSize(0);

      // TODO: remove tags and values that are mate pair inclined.
    }
  }
      private void collectReadData(final SAMRecord record, final ReferenceSequence ref) {
        metrics.TOTAL_READS++;
        readLengthHistogram.increment(record.getReadBases().length);

        if (!record.getReadFailsVendorQualityCheckFlag()) {
          metrics.PF_READS++;
          if (isNoiseRead(record)) metrics.PF_NOISE_READS++;

          if (record.getReadUnmappedFlag()) {
            // If the read is unmapped see if it's adapter sequence
            final byte[] readBases = record.getReadBases();
            if (!(record instanceof BAMRecord)) StringUtil.toUpperCase(readBases);

            if (isAdapterSequence(readBases)) {
              this.adapterReads++;
            }
          } else if (doRefMetrics) {
            metrics.PF_READS_ALIGNED++;
            if (!record.getReadNegativeStrandFlag()) numPositiveStrand++;

            if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
              metrics.READS_ALIGNED_IN_PAIRS++;

              // Check that both ends have mapq > minimum
              final Integer mateMq = record.getIntegerAttribute("MQ");
              if (mateMq == null
                  || mateMq >= MAPPING_QUALITY_THRESOLD
                      && record.getMappingQuality() >= MAPPING_QUALITY_THRESOLD) {
                ++this.chimerasDenominator;

                // With both reads mapped we can see if this pair is chimeric
                if (Math.abs(record.getInferredInsertSize()) > maxInsertSize
                    || !record.getReferenceIndex().equals(record.getMateReferenceIndex())) {
                  ++this.chimeras;
                }
              }
            }
          }
        }
      }