/** * Will hard clip every soft clipped bases in the read. * * @return a new read without the soft clipped bases */ private GATKSAMRecord hardClipSoftClippedBases() { if (read.isEmpty()) return read; int readIndex = 0; int cutLeft = -1; // first position to hard clip (inclusive) int cutRight = -1; // first position to hard clip (inclusive) boolean rightTail = false; // trigger to stop clipping the left tail and start cutting the right tail for (CigarElement cigarElement : read.getCigar().getCigarElements()) { if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) { if (rightTail) { cutRight = readIndex; } else { cutLeft = readIndex + cigarElement.getLength() - 1; } } else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP) rightTail = true; if (cigarElement.getOperator().consumesReadBases()) readIndex += cigarElement.getLength(); } // It is extremely important that we cut the end first otherwise the read coordinates change. if (cutRight >= 0) this.addOp(new ClippingOp(cutRight, read.getReadLength() - 1)); if (cutLeft >= 0) this.addOp(new ClippingOp(0, cutLeft)); return clipRead(ClippingRepresentation.HARDCLIP_BASES); }
/** * Generic functionality to hard clip a read, used internally by * hardClipByReferenceCoordinatesLeftTail and hardClipByReferenceCoordinatesRightTail. Should not * be used directly. * * <p>Note, it REQUIRES you to give the directionality of your hard clip (i.e. whether you're * clipping the left of right tail) by specifying either refStart < 0 or refStop < 0. * * @param refStart first base to clip (inclusive) * @param refStop last base to clip (inclusive) * @return a new read, without the clipped bases */ @Requires({ "!read.getReadUnmappedFlag()", "refStart < 0 || refStop < 0" }) // can't handle unmapped reads, as we're using reference coordinates to clip protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { if (read.isEmpty()) return read; int start; int stop; // Determine the read coordinate to start and stop hard clipping if (refStart < 0) { if (refStop < 0) throw new ReviewedStingException( "Only one of refStart or refStop must be < 0, not both (" + refStart + ", " + refStop + ")"); start = 0; stop = ReadUtils.getReadCoordinateForReferenceCoordinate( read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); } else { if (refStop >= 0) throw new ReviewedStingException( "Either refStart or refStop must be < 0 (" + refStart + ", " + refStop + ")"); start = ReadUtils.getReadCoordinateForReferenceCoordinate( read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); stop = read.getReadLength() - 1; } if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException( "Trying to clip before the start or after the end of a read"); if (start > stop) throw new ReviewedStingException( String.format( "START (%d) > (%d) STOP -- this should never happen, please check read: %s (CIGAR: %s)", start, stop, read, read.getCigarString())); if (start > 0 && stop < read.getReadLength() - 1) throw new ReviewedStingException( String.format( "Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString())); this.addOp(new ClippingOp(start, stop)); GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); this.ops = null; return clippedRead; }
public static List<GATKSAMRecord> hardClipToRegion( final List<GATKSAMRecord> reads, final int refStart, final int refStop) { final List<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>(reads.size()); for (final GATKSAMRecord read : reads) { final GATKSAMRecord clippedRead = hardClipToRegion(read, refStart, refStop); if (!clippedRead.isEmpty()) { returnList.add(clippedRead); } } return returnList; }
/** * Hard clips a read using read coordinates. * * @param start the first base to clip (inclusive) * @param stop the last base to clip (inclusive) * @return a new read, without the clipped bases */ @Requires({ "start >= 0 && stop <= read.getReadLength() - 1", // start and stop have to be within the read "start == 0 || stop == read.getReadLength() - 1" }) // cannot clip the middle of the read private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) return GATKSAMRecord.emptyRead(read); this.addOp(new ClippingOp(start, stop)); return clipRead(ClippingRepresentation.HARDCLIP_BASES); }
/** * Hard clips any leading insertions in the read. Only looks at the beginning of the read, not the * end. * * @return a new read without leading insertions */ private GATKSAMRecord hardClipLeadingInsertions() { if (read.isEmpty()) return read; for (CigarElement cigarElement : read.getCigar().getCigarElements()) { if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.INSERTION) break; else if (cigarElement.getOperator() == CigarOperator.INSERTION) this.addOp(new ClippingOp(0, cigarElement.getLength() - 1)); } return clipRead(ClippingRepresentation.HARDCLIP_BASES); }
/** * Hard clips away soft clipped bases that are below the given quality threshold * * @param read the read * @param minQual the mininum base quality score to revert the base (inclusive) * @return a new read without low quality soft clipped bases */ public static GATKSAMRecord hardClipLowQualitySoftClips(GATKSAMRecord read, byte minQual) { int nLeadingSoftClips = read.getAlignmentStart() - read.getSoftStart(); if (read.isEmpty() || nLeadingSoftClips > read.getReadLength()) return GATKSAMRecord.emptyRead(read); byte[] quals = read.getBaseQualities(EventType.BASE_SUBSTITUTION); int left = -1; if (nLeadingSoftClips > 0) { for (int i = nLeadingSoftClips - 1; i >= 0; i--) { if (quals[i] >= minQual) left = i; else break; } } int right = -1; int nTailingSoftClips = read.getSoftEnd() - read.getAlignmentEnd(); if (nTailingSoftClips > 0) { for (int i = read.getReadLength() - nTailingSoftClips; i < read.getReadLength(); i++) { if (quals[i] >= minQual) right = i; else break; } } GATKSAMRecord clippedRead = read; if (right >= 0 && right + 1 < clippedRead .getReadLength()) // only clip if there are softclipped bases (right >= 0) and the // first high quality soft clip is not the last base (right+1 < // readlength) clippedRead = hardClipByReadCoordinates( clippedRead, right + 1, clippedRead.getReadLength() - 1); // first we hard clip the low quality soft clips on the right tail if (left >= 0 && left - 1 > 0) // only clip if there are softclipped bases (left >= 0) and the first high quality // soft clip is not the last base (left-1 > 0) clippedRead = hardClipByReadCoordinates( clippedRead, 0, left - 1); // then we hard clip the low quality soft clips on the left tail return clippedRead; }
/** * Hard clips both tails of a read. Left tail goes from the beginning to the 'left' coordinate * (inclusive) Right tail goes from the 'right' coordinate (inclusive) until the end of the read * * @param left the coordinate of the last base to be clipped in the left tail (inclusive) * @param right the coordinate of the first base to be clipped in the right tail (inclusive) * @return a new read, without the clipped bases */ @Requires({ "left <= right", // tails cannot overlap "left >= read.getAlignmentStart()", // coordinate has to be within the mapped read "right <= read.getAlignmentEnd()" }) // coordinate has to be within the mapped read private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { if (read.isEmpty() || left == right) return GATKSAMRecord.emptyRead(read); GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); // after clipping one tail, it is possible that the consequent hard clipping of adjacent // deletions // make the left cut index no longer part of the read. In that case, clip the read entirely. if (left > leftTailRead.getAlignmentEnd()) return GATKSAMRecord.emptyRead(read); ReadClipper clipper = new ReadClipper(leftTailRead); return clipper.hardClipByReferenceCoordinatesLeftTail(left); }
/** * Clips a read according to ops and the chosen algorithm. * * @param algorithm What mode of clipping do you want to apply for the stacked operations. * @return the read with the clipping applied. */ public GATKSAMRecord clipRead(ClippingRepresentation algorithm) { if (ops == null) return getRead(); GATKSAMRecord clippedRead = read; for (ClippingOp op : getOps()) { final int readLength = clippedRead.getReadLength(); // check if the clipped read can still be clipped in the range requested if (op.start < readLength) { ClippingOp fixedOperation = op; if (op.stop >= readLength) fixedOperation = new ClippingOp(op.start, readLength - 1); clippedRead = fixedOperation.apply(algorithm, clippedRead); } } wasClipped = true; ops.clear(); if (clippedRead.isEmpty()) return GATKSAMRecord.emptyRead(clippedRead); return clippedRead; }
/** * Clips any contiguous tail (left, right or both) with base quality lower than lowQual using the * desired algorithm. * * <p>This function will look for low quality tails and hard clip them away. A low quality tail * ends when a base has base quality greater than lowQual. * * @param algorithm the algorithm to use (HardClip, SoftClip, Write N's,...) * @param lowQual every base quality lower than or equal to this in the tail of the read will be * hard clipped * @return a new read without low quality tails */ private GATKSAMRecord clipLowQualEnds(ClippingRepresentation algorithm, byte lowQual) { if (read.isEmpty()) return read; final byte[] quals = read.getBaseQualities(); final int readLength = read.getReadLength(); int leftClipIndex = 0; int rightClipIndex = readLength - 1; // check how far we can clip both sides while (rightClipIndex >= 0 && quals[rightClipIndex] <= lowQual) rightClipIndex--; while (leftClipIndex < readLength && quals[leftClipIndex] <= lowQual) leftClipIndex++; // if the entire read should be clipped, then return an empty read. if (leftClipIndex > rightClipIndex) return GATKSAMRecord.emptyRead(read); if (rightClipIndex < readLength - 1) { this.addOp(new ClippingOp(rightClipIndex + 1, readLength - 1)); } if (leftClipIndex > 0) { this.addOp(new ClippingOp(0, leftClipIndex - 1)); } return this.clipRead(algorithm); }
/** * Turns soft clipped bases into matches * * @return a new read with every soft clip turned into a match */ private GATKSAMRecord revertSoftClippedBases() { if (read.isEmpty()) return read; this.addOp(new ClippingOp(0, 0)); return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES); }