private GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) { GATKSAMRecord unclipped = (GATKSAMRecord) read.clone(); Cigar unclippedCigar = new Cigar(); int matchesCount = 0; for (CigarElement element : read.getCigar().getCigarElements()) { if (element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.MATCH_OR_MISMATCH) matchesCount += element.getLength(); else if (matchesCount > 0) { unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH)); matchesCount = 0; unclippedCigar.add(element); } else unclippedCigar.add(element); } if (matchesCount > 0) unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH)); unclipped.setCigar(unclippedCigar); final int newStart = read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar); unclipped.setAlignmentStart(newStart); if (newStart <= 0) { // if the start of the unclipped read occurs before the contig, // we must hard clip away the bases since we cannot represent reads with // negative or 0 alignment start values in the SAMRecord (e.g., 0 means unaligned) return hardClip(unclipped, 0, -newStart); } else { return unclipped; } }
/** * Checks if a hard clipped cigar left a read starting or ending with deletions or gap (N) and * cleans it up accordingly. * * @param cigar the original cigar * @return an object with the shifts (see CigarShift class) */ private CigarShift cleanHardClippedCigar(final Cigar cigar) { final Cigar cleanCigar = new Cigar(); int shiftFromStart = 0; int shiftFromEnd = 0; Stack<CigarElement> cigarStack = new Stack<CigarElement>(); final Stack<CigarElement> inverseCigarStack = new Stack<CigarElement>(); for (final CigarElement cigarElement : cigar.getCigarElements()) cigarStack.push(cigarElement); for (int i = 1; i <= 2; i++) { int shift = 0; int totalHardClip = 0; boolean readHasStarted = false; boolean addedHardClips = false; while (!cigarStack.empty()) { CigarElement cigarElement = cigarStack.pop(); if (!readHasStarted && cigarElement.getOperator() != CigarOperator.DELETION && cigarElement.getOperator() != CigarOperator.SKIPPED_REGION && cigarElement.getOperator() != CigarOperator.HARD_CLIP) readHasStarted = true; else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP) totalHardClip += cigarElement.getLength(); else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION) totalHardClip += cigarElement.getLength(); else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.SKIPPED_REGION) totalHardClip += cigarElement.getLength(); if (readHasStarted) { if (i == 1) { if (!addedHardClips) { if (totalHardClip > 0) inverseCigarStack.push(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP)); addedHardClips = true; } inverseCigarStack.push(cigarElement); } else { if (!addedHardClips) { if (totalHardClip > 0) cleanCigar.add(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP)); addedHardClips = true; } cleanCigar.add(cigarElement); } } } // first pass (i=1) is from end to start of the cigar elements if (i == 1) { shiftFromEnd = shift; cigarStack = inverseCigarStack; } // second pass (i=2) is from start to end with the end already cleaned else { shiftFromStart = shift; } } return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd); }
@Requires({"!cigar.isEmpty()"}) private CigarShift hardClipCigar(Cigar cigar, int start, int stop) { Cigar newCigar = new Cigar(); int index = 0; int totalHardClipCount = stop - start + 1; int alignmentShift = 0; // caused by hard clipping deletions // hard clip the beginning of the cigar string if (start == 0) { Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator(); CigarElement cigarElement = cigarElementIterator.next(); // Skip all leading hard clips while (cigarElement.getOperator() == CigarOperator.HARD_CLIP) { totalHardClipCount += cigarElement.getLength(); if (cigarElementIterator.hasNext()) cigarElement = cigarElementIterator.next(); else throw new ReviewedGATKException( "Read is entirely hardclipped, shouldn't be trying to clip it's cigar string"); } // keep clipping until we hit stop while (index <= stop) { int shift = 0; if (cigarElement.getOperator().consumesReadBases()) shift = cigarElement.getLength(); // we're still clipping or just finished perfectly if (index + shift == stop + 1) { alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength()); newCigar.add( new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP)); } // element goes beyond what we need to clip else if (index + shift > stop + 1) { int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1); alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop - index + 1); newCigar.add( new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP)); newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator())); } index += shift; alignmentShift += calculateHardClippingAlignmentShift(cigarElement, shift); if (index <= stop && cigarElementIterator.hasNext()) cigarElement = cigarElementIterator.next(); else break; } // add the remaining cigar elements while (cigarElementIterator.hasNext()) { cigarElement = cigarElementIterator.next(); newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator())); } } // hard clip the end of the cigar string else { Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator(); CigarElement cigarElement = cigarElementIterator.next(); // Keep marching on until we find the start while (index < start) { int shift = 0; if (cigarElement.getOperator().consumesReadBases()) shift = cigarElement.getLength(); // we haven't gotten to the start yet, keep everything as is. if (index + shift < start) newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator())); // element goes beyond our clip starting position else { int elementLengthAfterChopping = start - index; alignmentShift += calculateHardClippingAlignmentShift( cigarElement, cigarElement.getLength() - (start - index)); // if this last element is a HARD CLIP operator, just merge it with our hard clip operator // to be added later if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) totalHardClipCount += elementLengthAfterChopping; // otherwise, maintain what's left of this last operator else newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator())); } index += shift; if (index < start && cigarElementIterator.hasNext()) cigarElement = cigarElementIterator.next(); else break; } // check if we are hard clipping indels while (cigarElementIterator.hasNext()) { cigarElement = cigarElementIterator.next(); alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength()); // if the read had a HardClip operator in the end, combine it with the Hard Clip we are // adding if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) totalHardClipCount += cigarElement.getLength(); } newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP)); } return cleanHardClippedCigar(newCigar); }
/** Given a cigar string, soft clip up to startClipEnd and soft clip starting at endClipBegin */ private Cigar softClip(final Cigar __cigar, final int __startClipEnd, final int __endClipBegin) { if (__endClipBegin <= __startClipEnd) { // whole thing should be soft clipped int cigarLength = 0; for (CigarElement e : __cigar.getCigarElements()) { cigarLength += e.getLength(); } Cigar newCigar = new Cigar(); newCigar.add(new CigarElement(cigarLength, CigarOperator.SOFT_CLIP)); assert newCigar.isValid(null, -1) == null; return newCigar; } int curLength = 0; Vector<CigarElement> newElements = new Vector<CigarElement>(); for (CigarElement curElem : __cigar.getCigarElements()) { if (!curElem.getOperator().consumesReadBases()) { if (curElem.getOperator() == CigarOperator.HARD_CLIP || curLength > __startClipEnd && curLength < __endClipBegin) { newElements.add(new CigarElement(curElem.getLength(), curElem.getOperator())); } continue; } int s = curLength; int e = curLength + curElem.getLength(); if (e <= __startClipEnd || s >= __endClipBegin) { // must turn this entire thing into a clip newElements.add(new CigarElement(curElem.getLength(), CigarOperator.SOFT_CLIP)); } else if (s >= __startClipEnd && e <= __endClipBegin) { // same thing newElements.add(new CigarElement(curElem.getLength(), curElem.getOperator())); } else { // we are clipping in the middle of this guy CigarElement newStart = null; CigarElement newMid = null; CigarElement newEnd = null; int midLength = curElem.getLength(); if (s < __startClipEnd) { newStart = new CigarElement(__startClipEnd - s, CigarOperator.SOFT_CLIP); midLength -= newStart.getLength(); } if (e > __endClipBegin) { newEnd = new CigarElement(e - __endClipBegin, CigarOperator.SOFT_CLIP); midLength -= newEnd.getLength(); } assert midLength >= 0; if (midLength > 0) { newMid = new CigarElement(midLength, curElem.getOperator()); } if (newStart != null) { newElements.add(newStart); } if (newMid != null) { newElements.add(newMid); } if (newEnd != null) { newElements.add(newEnd); } } curLength += curElem.getLength(); } Vector<CigarElement> finalNewElements = new Vector<CigarElement>(); CigarElement lastElement = null; for (CigarElement elem : newElements) { if (lastElement == null || lastElement.getOperator() != elem.getOperator()) { if (lastElement != null) { finalNewElements.add(lastElement); } lastElement = elem; } else { lastElement = new CigarElement(lastElement.getLength() + elem.getLength(), lastElement.getOperator()); } } if (lastElement != null) { finalNewElements.add(lastElement); } Cigar newCigar = new Cigar(finalNewElements); assert newCigar.isValid(null, -1) == null; return newCigar; }