Example #1
0
  /**
   * Hard clips away soft clipped bases that are below the given quality threshold
   *
   * @param read the read
   * @param minQual the mininum base quality score to revert the base (inclusive)
   * @return a new read without low quality soft clipped bases
   */
  public static GATKSAMRecord hardClipLowQualitySoftClips(GATKSAMRecord read, byte minQual) {
    int nLeadingSoftClips = read.getAlignmentStart() - read.getSoftStart();
    if (read.isEmpty() || nLeadingSoftClips > read.getReadLength())
      return GATKSAMRecord.emptyRead(read);

    byte[] quals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
    int left = -1;

    if (nLeadingSoftClips > 0) {
      for (int i = nLeadingSoftClips - 1; i >= 0; i--) {
        if (quals[i] >= minQual) left = i;
        else break;
      }
    }

    int right = -1;
    int nTailingSoftClips = read.getSoftEnd() - read.getAlignmentEnd();
    if (nTailingSoftClips > 0) {
      for (int i = read.getReadLength() - nTailingSoftClips; i < read.getReadLength(); i++) {
        if (quals[i] >= minQual) right = i;
        else break;
      }
    }

    GATKSAMRecord clippedRead = read;
    if (right >= 0
        && right + 1
            < clippedRead
                .getReadLength()) // only clip if there are softclipped bases (right >= 0) and the
                                  // first high quality soft clip is not the last base (right+1 <
                                  // readlength)
    clippedRead =
          hardClipByReadCoordinates(
              clippedRead,
              right + 1,
              clippedRead.getReadLength()
                  - 1); // first we hard clip the low quality soft clips on the right tail
    if (left >= 0
        && left - 1
            > 0) // only clip if there are softclipped bases (left >= 0) and the first high quality
                 // soft clip is not the last base (left-1 > 0)
    clippedRead =
          hardClipByReadCoordinates(
              clippedRead,
              0,
              left - 1); // then we hard clip the low quality soft clips on the left tail

    return clippedRead;
  }