@Override protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) { return (double) read.getBaseQualities()[ ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead( read, refLoc, ReadUtils.ClippingTail.RIGHT_TAIL)]; }
/** * Hard clip bases from read, from start to stop in base coordinates * * <p>If start == 0, then we will clip from the front of the read, otherwise we clip from the * right. If start == 0 and stop == 10, this would clip out the first 10 bases of the read. * * <p>Note that this function works with reads with negative alignment starts, in order to allow * us to hardClip reads that have had their soft clips reverted and so might have negative * alignment starts * * <p>Works properly with reduced reads and insertion/deletion base qualities * * @param read a non-null read * @param start a start >= 0 and < read.length * @param stop a stop >= 0 and < read.length. * @return a cloned version of read that has been properly trimmed down */ private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) { // If the read is unmapped there is no Cigar string and neither should we create a new cigar // string final CigarShift cigarShift = (read.getReadUnmappedFlag()) ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(read.getCigar(), start, stop); // the cigar may force a shift left or right (or both) in case we are left with insertions // starting or ending the read after applying the hard clip on start/stop. final int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd; final byte[] newBases = new byte[newLength]; final byte[] newQuals = new byte[newLength]; final int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart; System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength); System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); final GATKSAMRecord hardClippedRead = (GATKSAMRecord) read.clone(); hardClippedRead .resetSoftStartAndEnd(); // reset the cached soft start and end because they may have // changed now that the read was hard clipped. No need to calculate // them now. They'll be lazily calculated on the next call to // getSoftStart()/End() hardClippedRead.setBaseQualities(newQuals); hardClippedRead.setReadBases(newBases); hardClippedRead.setCigar(cigarShift.cigar); if (start == 0) hardClippedRead.setAlignmentStart( read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar)); if (read.hasBaseIndelQualities()) { final byte[] newBaseInsertionQuals = new byte[newLength]; final byte[] newBaseDeletionQuals = new byte[newLength]; System.arraycopy( read.getBaseInsertionQualities(), copyStart, newBaseInsertionQuals, 0, newLength); System.arraycopy( read.getBaseDeletionQualities(), copyStart, newBaseDeletionQuals, 0, newLength); hardClippedRead.setBaseQualities(newBaseInsertionQuals, EventType.BASE_INSERTION); hardClippedRead.setBaseQualities(newBaseDeletionQuals, EventType.BASE_DELETION); } return hardClippedRead; }
/** * @param read a read containing the variant * @return number of hard clipped and low qual bases at the read end (where end is right end * w.r.t. the reference) */ public static int getNumClippedBasesAtEnd(final GATKSAMRecord read) { // check for hard clips (never consider these bases): final Cigar c = read.getCigar(); CigarElement last = c.getCigarElement(c.numCigarElements() - 1); int numEndClippedBases = 0; if (last.getOperator() == CigarOperator.H) { numEndClippedBases = last.getLength(); } final byte[] unclippedReadBases = read.getReadBases(); final byte[] unclippedReadQuals = read.getBaseQualities(); // Do a stricter base clipping than provided by CIGAR string, since this one may be too // conservative, // and may leave a string of Q2 bases still hanging off the reads. // TODO: this code may not even get used because HaplotypeCaller already hard clips low quality // tails for (int i = unclippedReadBases.length - numEndClippedBases - 1; i >= 0; i--) { if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD) numEndClippedBases++; else break; } return numEndClippedBases; }
/** * Clips the bases in read according to this operation's start and stop. Uses the clipping * representation used is the one provided by algorithm argument. * * @param algorithm clipping algorithm to use * @param originalRead the read to be clipped */ public GATKSAMRecord apply(ClippingRepresentation algorithm, GATKSAMRecord originalRead) { GATKSAMRecord read = (GATKSAMRecord) originalRead.clone(); byte[] quals = read.getBaseQualities(); byte[] bases = read.getReadBases(); byte[] newBases = new byte[bases.length]; byte[] newQuals = new byte[quals.length]; switch (algorithm) { // important note: // it's not safe to call read.getReadBases()[i] = 'N' or read.getBaseQualities()[i] = 0 // because you're not guaranteed to get a pointer to the actual array of bytes in the // GATKSAMRecord case WRITE_NS: for (int i = 0; i < bases.length; i++) { if (i >= start && i <= stop) { newBases[i] = 'N'; } else { newBases[i] = bases[i]; } } read.setReadBases(newBases); break; case WRITE_Q0S: for (int i = 0; i < quals.length; i++) { if (i >= start && i <= stop) { newQuals[i] = 0; } else { newQuals[i] = quals[i]; } } read.setBaseQualities(newQuals); break; case WRITE_NS_Q0S: for (int i = 0; i < bases.length; i++) { if (i >= start && i <= stop) { newQuals[i] = 0; newBases[i] = 'N'; } else { newQuals[i] = quals[i]; newBases[i] = bases[i]; } } read.setBaseQualities(newBases); read.setReadBases(newBases); break; case HARDCLIP_BASES: read = hardClip(read, start, stop); break; case SOFTCLIP_BASES: if (read.getReadUnmappedFlag()) { // we can't process unmapped reads throw new UserException("Read Clipper cannot soft clip unmapped reads"); } // System.out.printf("%d %d %d%n", stop, start, read.getReadLength()); int myStop = stop; if ((stop + 1 - start) == read.getReadLength()) { // BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it // alone // Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be // represented with SOFTCLIP_BASES, just leaving it alone", read.getReadName())); // break; myStop--; // just decrement stop } if (start > 0 && myStop != read.getReadLength() - 1) throw new RuntimeException( String.format( "Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d", read.getReadName(), start, myStop)); Cigar oldCigar = read.getCigar(); int scLeft = 0, scRight = read.getReadLength(); if (start == 0) scLeft = myStop + 1; else scRight = start; Cigar newCigar = softClip(oldCigar, scLeft, scRight); read.setCigar(newCigar); int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar); int newStart = read.getAlignmentStart() + newClippedStart; read.setAlignmentStart(newStart); break; case REVERT_SOFTCLIPPED_BASES: read = revertSoftClippedBases(read); break; default: throw new IllegalStateException("Unexpected Clipping operator type " + algorithm); } return read; }