Example #1
0
  /**
   * Will hard clip every soft clipped bases in the read.
   *
   * @return a new read without the soft clipped bases
   */
  private GATKSAMRecord hardClipSoftClippedBases() {
    if (read.isEmpty()) return read;

    int readIndex = 0;
    int cutLeft = -1; // first position to hard clip (inclusive)
    int cutRight = -1; // first position to hard clip (inclusive)
    boolean rightTail =
        false; // trigger to stop clipping the left tail and start cutting the right tail

    for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
      if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
        if (rightTail) {
          cutRight = readIndex;
        } else {
          cutLeft = readIndex + cigarElement.getLength() - 1;
        }
      } else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP) rightTail = true;

      if (cigarElement.getOperator().consumesReadBases()) readIndex += cigarElement.getLength();
    }

    // It is extremely important that we cut the end first otherwise the read coordinates change.
    if (cutRight >= 0) this.addOp(new ClippingOp(cutRight, read.getReadLength() - 1));
    if (cutLeft >= 0) this.addOp(new ClippingOp(0, cutLeft));

    return clipRead(ClippingRepresentation.HARDCLIP_BASES);
  }
Example #2
0
  /**
   * Generic functionality to hard clip a read, used internally by
   * hardClipByReferenceCoordinatesLeftTail and hardClipByReferenceCoordinatesRightTail. Should not
   * be used directly.
   *
   * <p>Note, it REQUIRES you to give the directionality of your hard clip (i.e. whether you're
   * clipping the left of right tail) by specifying either refStart < 0 or refStop < 0.
   *
   * @param refStart first base to clip (inclusive)
   * @param refStop last base to clip (inclusive)
   * @return a new read, without the clipped bases
   */
  @Requires({
    "!read.getReadUnmappedFlag()",
    "refStart < 0 || refStop < 0"
  }) // can't handle unmapped reads, as we're using reference coordinates to clip
  protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
    if (read.isEmpty()) return read;

    int start;
    int stop;

    // Determine the read coordinate to start and stop hard clipping
    if (refStart < 0) {
      if (refStop < 0)
        throw new ReviewedStingException(
            "Only one of refStart or refStop must be < 0, not both ("
                + refStart
                + ", "
                + refStop
                + ")");
      start = 0;
      stop =
          ReadUtils.getReadCoordinateForReferenceCoordinate(
              read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
    } else {
      if (refStop >= 0)
        throw new ReviewedStingException(
            "Either refStart or refStop must be < 0 (" + refStart + ", " + refStop + ")");
      start =
          ReadUtils.getReadCoordinateForReferenceCoordinate(
              read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
      stop = read.getReadLength() - 1;
    }

    if (start < 0 || stop > read.getReadLength() - 1)
      throw new ReviewedStingException(
          "Trying to clip before the start or after the end of a read");

    if (start > stop)
      throw new ReviewedStingException(
          String.format(
              "START (%d) > (%d) STOP -- this should never happen, please check read: %s (CIGAR: %s)",
              start, stop, read, read.getCigarString()));

    if (start > 0 && stop < read.getReadLength() - 1)
      throw new ReviewedStingException(
          String.format(
              "Trying to clip the middle of the read: start %d, stop %d, cigar: %s",
              start, stop, read.getCigarString()));

    this.addOp(new ClippingOp(start, stop));
    GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
    this.ops = null;
    return clippedRead;
  }
Example #3
0
 public static List<GATKSAMRecord> hardClipToRegion(
     final List<GATKSAMRecord> reads, final int refStart, final int refStop) {
   final List<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>(reads.size());
   for (final GATKSAMRecord read : reads) {
     final GATKSAMRecord clippedRead = hardClipToRegion(read, refStart, refStop);
     if (!clippedRead.isEmpty()) {
       returnList.add(clippedRead);
     }
   }
   return returnList;
 }
Example #4
0
  /**
   * Hard clips a read using read coordinates.
   *
   * @param start the first base to clip (inclusive)
   * @param stop the last base to clip (inclusive)
   * @return a new read, without the clipped bases
   */
  @Requires({
    "start >= 0 && stop <= read.getReadLength() - 1", // start and stop have to be within the read
    "start == 0 || stop == read.getReadLength() - 1"
  }) // cannot clip the middle of the read
  private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
    if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
      return GATKSAMRecord.emptyRead(read);

    this.addOp(new ClippingOp(start, stop));
    return clipRead(ClippingRepresentation.HARDCLIP_BASES);
  }
Example #5
0
  /**
   * Hard clips any leading insertions in the read. Only looks at the beginning of the read, not the
   * end.
   *
   * @return a new read without leading insertions
   */
  private GATKSAMRecord hardClipLeadingInsertions() {
    if (read.isEmpty()) return read;

    for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
      if (cigarElement.getOperator() != CigarOperator.HARD_CLIP
          && cigarElement.getOperator() != CigarOperator.SOFT_CLIP
          && cigarElement.getOperator() != CigarOperator.INSERTION) break;
      else if (cigarElement.getOperator() == CigarOperator.INSERTION)
        this.addOp(new ClippingOp(0, cigarElement.getLength() - 1));
    }
    return clipRead(ClippingRepresentation.HARDCLIP_BASES);
  }
Example #6
0
  /**
   * Hard clips away soft clipped bases that are below the given quality threshold
   *
   * @param read the read
   * @param minQual the mininum base quality score to revert the base (inclusive)
   * @return a new read without low quality soft clipped bases
   */
  public static GATKSAMRecord hardClipLowQualitySoftClips(GATKSAMRecord read, byte minQual) {
    int nLeadingSoftClips = read.getAlignmentStart() - read.getSoftStart();
    if (read.isEmpty() || nLeadingSoftClips > read.getReadLength())
      return GATKSAMRecord.emptyRead(read);

    byte[] quals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
    int left = -1;

    if (nLeadingSoftClips > 0) {
      for (int i = nLeadingSoftClips - 1; i >= 0; i--) {
        if (quals[i] >= minQual) left = i;
        else break;
      }
    }

    int right = -1;
    int nTailingSoftClips = read.getSoftEnd() - read.getAlignmentEnd();
    if (nTailingSoftClips > 0) {
      for (int i = read.getReadLength() - nTailingSoftClips; i < read.getReadLength(); i++) {
        if (quals[i] >= minQual) right = i;
        else break;
      }
    }

    GATKSAMRecord clippedRead = read;
    if (right >= 0
        && right + 1
            < clippedRead
                .getReadLength()) // only clip if there are softclipped bases (right >= 0) and the
                                  // first high quality soft clip is not the last base (right+1 <
                                  // readlength)
    clippedRead =
          hardClipByReadCoordinates(
              clippedRead,
              right + 1,
              clippedRead.getReadLength()
                  - 1); // first we hard clip the low quality soft clips on the right tail
    if (left >= 0
        && left - 1
            > 0) // only clip if there are softclipped bases (left >= 0) and the first high quality
                 // soft clip is not the last base (left-1 > 0)
    clippedRead =
          hardClipByReadCoordinates(
              clippedRead,
              0,
              left - 1); // then we hard clip the low quality soft clips on the left tail

    return clippedRead;
  }
Example #7
0
  /**
   * Hard clips both tails of a read. Left tail goes from the beginning to the 'left' coordinate
   * (inclusive) Right tail goes from the 'right' coordinate (inclusive) until the end of the read
   *
   * @param left the coordinate of the last base to be clipped in the left tail (inclusive)
   * @param right the coordinate of the first base to be clipped in the right tail (inclusive)
   * @return a new read, without the clipped bases
   */
  @Requires({
    "left <= right", // tails cannot overlap
    "left >= read.getAlignmentStart()", // coordinate has to be within the mapped read
    "right <= read.getAlignmentEnd()"
  }) // coordinate has to be within the mapped read
  private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
    if (read.isEmpty() || left == right) return GATKSAMRecord.emptyRead(read);
    GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);

    // after clipping one tail, it is possible that the consequent hard clipping of adjacent
    // deletions
    // make the left cut index no longer part of the read. In that case, clip the read entirely.
    if (left > leftTailRead.getAlignmentEnd()) return GATKSAMRecord.emptyRead(read);

    ReadClipper clipper = new ReadClipper(leftTailRead);
    return clipper.hardClipByReferenceCoordinatesLeftTail(left);
  }
Example #8
0
  /**
   * Clips a read according to ops and the chosen algorithm.
   *
   * @param algorithm What mode of clipping do you want to apply for the stacked operations.
   * @return the read with the clipping applied.
   */
  public GATKSAMRecord clipRead(ClippingRepresentation algorithm) {
    if (ops == null) return getRead();

    GATKSAMRecord clippedRead = read;
    for (ClippingOp op : getOps()) {
      final int readLength = clippedRead.getReadLength();
      // check if the clipped read can still be clipped in the range requested
      if (op.start < readLength) {
        ClippingOp fixedOperation = op;
        if (op.stop >= readLength) fixedOperation = new ClippingOp(op.start, readLength - 1);

        clippedRead = fixedOperation.apply(algorithm, clippedRead);
      }
    }
    wasClipped = true;
    ops.clear();
    if (clippedRead.isEmpty()) return GATKSAMRecord.emptyRead(clippedRead);
    return clippedRead;
  }
Example #9
0
  /**
   * Clips any contiguous tail (left, right or both) with base quality lower than lowQual using the
   * desired algorithm.
   *
   * <p>This function will look for low quality tails and hard clip them away. A low quality tail
   * ends when a base has base quality greater than lowQual.
   *
   * @param algorithm the algorithm to use (HardClip, SoftClip, Write N's,...)
   * @param lowQual every base quality lower than or equal to this in the tail of the read will be
   *     hard clipped
   * @return a new read without low quality tails
   */
  private GATKSAMRecord clipLowQualEnds(ClippingRepresentation algorithm, byte lowQual) {
    if (read.isEmpty()) return read;

    final byte[] quals = read.getBaseQualities();
    final int readLength = read.getReadLength();
    int leftClipIndex = 0;
    int rightClipIndex = readLength - 1;

    // check how far we can clip both sides
    while (rightClipIndex >= 0 && quals[rightClipIndex] <= lowQual) rightClipIndex--;
    while (leftClipIndex < readLength && quals[leftClipIndex] <= lowQual) leftClipIndex++;

    // if the entire read should be clipped, then return an empty read.
    if (leftClipIndex > rightClipIndex) return GATKSAMRecord.emptyRead(read);

    if (rightClipIndex < readLength - 1) {
      this.addOp(new ClippingOp(rightClipIndex + 1, readLength - 1));
    }
    if (leftClipIndex > 0) {
      this.addOp(new ClippingOp(0, leftClipIndex - 1));
    }
    return this.clipRead(algorithm);
  }
Example #10
0
  /**
   * Turns soft clipped bases into matches
   *
   * @return a new read with every soft clip turned into a match
   */
  private GATKSAMRecord revertSoftClippedBases() {
    if (read.isEmpty()) return read;

    this.addOp(new ClippingOp(0, 0));
    return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES);
  }