public static int getReadCoordinateForReferenceCoordinate( final int alignmentStart, final Cigar cigar, final int refCoord, final ClippingTail tail, final boolean allowGoalNotReached) { final Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate( alignmentStart, cigar, refCoord, allowGoalNotReached); int readCoord = result.getLeft(); // Corner case one: clipping the right tail and falls on deletion, move to the next // read coordinate. It is not a problem for the left tail because the default answer // from getReadCoordinateForReferenceCoordinate is to give the previous read coordinate. if (result.getRight() && tail == ClippingTail.RIGHT_TAIL) { readCoord++; } // clipping the left tail and first base is insertion, go to the next read coordinate // with the same reference coordinate. Advance to the next cigar element, or to the // end of the read if there is no next element. final CigarElement firstElementIsInsertion = readStartsWithInsertion(cigar); if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion != null) { readCoord = Math.min(firstElementIsInsertion.getLength(), cigar.getReadLength() - 1); } return readCoord; }
/** * Calculates the reference coordinate for the end of the read taking into account soft clips but * not hard clips. * * <p>Note: getUnclippedEnd() adds soft and hard clips, this function only adds soft clips. * * @param read the read * @param cigar the read's cigar * <p>Note: this overload of the function takes the cigar as input for speed because getCigar * is an expensive operation. Most callers should use the overload that does not take the * cigar. * @return the unclipped end of the read taking soft clips (but not hard clips) into account */ public static int getSoftEnd(final GATKRead read, final Cigar cigar) { Utils.nonNull(read, "read"); Utils.nonNull(cigar, "cigar"); boolean foundAlignedBase = false; int softEnd = read.getEnd(); final List<CigarElement> cigs = cigar.getCigarElements(); for (int i = cigs.size() - 1; i >= 0; --i) { final CigarElement cig = cigs.get(i); final CigarOperator op = cig.getOperator(); if (op == CigarOperator .SOFT_CLIP) { // assumes the soft clip that we found is at the end of the aligned read softEnd += cig.getLength(); } else if (op != CigarOperator.HARD_CLIP) { foundAlignedBase = true; break; } } if (!foundAlignedBase) { // for example 64H14S, the soft end is actually the same as the // alignment end softEnd = read.getEnd(); } return softEnd; }
public static Pair<Boolean, CigarElement> readStartsWithInsertion(final Cigar cigar) { for (CigarElement cigarElement : cigar.getCigarElements()) { if (cigarElement.getOperator() == CigarOperator.INSERTION) return new Pair<Boolean, CigarElement>(true, cigarElement); else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP) break; } return new Pair<Boolean, CigarElement>(false, null); }
private static boolean isAfterOp( final Cigar cigar, final int currentOperatorIndex, final CigarOperator op, final boolean isFirstOp, final boolean isFirstBaseOfOp) { return !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex - 1).getOperator() == op; }
/** * Checks if a read starts with an insertion. * * @param cigarForRead the CIGAR to evaluate * @param ignoreSoftClipOps should we ignore S operators when evaluating whether an I operator is * at the beginning? Note that H operators are always ignored. * @return the element if it's a leading insertion or null otherwise */ public static CigarElement readStartsWithInsertion( final Cigar cigarForRead, final boolean ignoreSoftClipOps) { for (final CigarElement cigarElement : cigarForRead.getCigarElements()) { if (cigarElement.getOperator() == CigarOperator.INSERTION) { return cigarElement; } else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && (!ignoreSoftClipOps || cigarElement.getOperator() != CigarOperator.SOFT_CLIP)) { break; } } return null; }
/** * Calculates the reference coordinate for the beginning of the read taking into account soft * clips but not hard clips. * * <p>Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips. * * @param read the read * @param cigar the read's cigar * <p>Note: this overload of the function takes the cigar as input for speed because getCigar * is an expensive operation. Most callers should use the overload that does not take the * cigar. * @return the unclipped start of the read taking soft clips (but not hard clips) into account */ public static int getSoftStart(final GATKRead read, final Cigar cigar) { Utils.nonNull(read, "read"); Utils.nonNull(cigar, "cigar"); int softStart = read.getStart(); for (final CigarElement cig : cigar.getCigarElements()) { final CigarOperator op = cig.getOperator(); if (op == CigarOperator.SOFT_CLIP) { softStart -= cig.getLength(); } else if (op != CigarOperator.HARD_CLIP) { break; } } return softStart; }
private static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate( final int alignmentStart, final Cigar cigar, final int refCoord, final boolean allowGoalNotReached) { int readBases = 0; int refBases = 0; boolean fallsInsideDeletionOrSkippedRegion = false; boolean endJustBeforeDeletionOrSkippedRegion = false; boolean fallsInsideOrJustBeforeDeletionOrSkippedRegion = false; final int goal = refCoord - alignmentStart; // The goal is to move this many reference bases if (goal < 0) { if (allowGoalNotReached) { return new MutablePair<>(CLIPPING_GOAL_NOT_REACHED, false); } else { throw new GATKException( "Somehow the requested coordinate is not covered by the read. Too many deletions?"); } } boolean goalReached = refBases == goal; final Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator(); while (!goalReached && cigarElementIterator.hasNext()) { final CigarElement cigarElement = cigarElementIterator.next(); int shift = 0; if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) { if (refBases + cigarElement.getLength() < goal) { shift = cigarElement.getLength(); } else { shift = goal - refBases; } refBases += shift; } goalReached = refBases == goal; if (!goalReached && cigarElement.getOperator().consumesReadBases()) { readBases += cigarElement.getLength(); } if (goalReached) { // Is this base's reference position within this cigar element? Or did we use it all? final boolean endsWithinCigar = shift < cigarElement.getLength(); // If it isn't, we need to check the next one. There should *ALWAYS* be a next one // since we checked if the goal coordinate is within the read length, so this is just a // sanity check. if (!endsWithinCigar && !cigarElementIterator.hasNext()) { if (allowGoalNotReached) { return new MutablePair<>(CLIPPING_GOAL_NOT_REACHED, false); } else { throw new GATKException( String.format( "Reference coordinate corresponds to a non-existent base in the read. This should never happen -- check read with alignment start: %s and cigar: %s", alignmentStart, cigar)); } } CigarElement nextCigarElement = null; // if we end inside the current cigar element, we just have to check if it is a deletion (or // skipped region) if (endsWithinCigar) { fallsInsideDeletionOrSkippedRegion = (cigarElement.getOperator() == CigarOperator.DELETION || cigarElement.getOperator() == CigarOperator.SKIPPED_REGION); } // if we end outside the current cigar element, we need to check if the next element is an // insertion, deletion or skipped region. else { nextCigarElement = cigarElementIterator.next(); // if it's an insertion, we need to clip the whole insertion before looking at the next // element if (nextCigarElement.getOperator() == CigarOperator.INSERTION) { readBases += nextCigarElement.getLength(); if (!cigarElementIterator.hasNext()) { if (allowGoalNotReached) { return new MutablePair<>(CLIPPING_GOAL_NOT_REACHED, false); } else { throw new GATKException( String.format( "Reference coordinate corresponds to a non-existent base in the read. This should never happen -- check read with alignment start: %s and cigar: %s", alignmentStart, cigar)); } } nextCigarElement = cigarElementIterator.next(); } // if it's a deletion (or skipped region), we will pass the information on to be handled // downstream. endJustBeforeDeletionOrSkippedRegion = (nextCigarElement.getOperator() == CigarOperator.DELETION || nextCigarElement.getOperator() == CigarOperator.SKIPPED_REGION); } fallsInsideOrJustBeforeDeletionOrSkippedRegion = endJustBeforeDeletionOrSkippedRegion || fallsInsideDeletionOrSkippedRegion; // If we reached our goal outside a deletion (or skipped region), add the shift if (!fallsInsideOrJustBeforeDeletionOrSkippedRegion && cigarElement.getOperator().consumesReadBases()) { readBases += shift; } // If we reached our goal just before a deletion (or skipped region) we need // to add the shift of the current cigar element but go back to it's last element to return // the last // base before the deletion (or skipped region) (see warning in function contracts) else if (endJustBeforeDeletionOrSkippedRegion && cigarElement.getOperator().consumesReadBases()) { readBases += shift - 1; } // If we reached our goal inside a deletion (or skipped region), or just between a // deletion and a skipped region, // then we must backtrack to the last base before the deletion (or skipped region) else if (fallsInsideDeletionOrSkippedRegion || (endJustBeforeDeletionOrSkippedRegion && nextCigarElement.getOperator().equals(CigarOperator.N)) || (endJustBeforeDeletionOrSkippedRegion && nextCigarElement.getOperator().equals(CigarOperator.D))) { readBases--; } } } if (!goalReached) { if (allowGoalNotReached) { return new MutablePair<>(CLIPPING_GOAL_NOT_REACHED, false); } else { throw new GATKException( "Somehow the requested coordinate is not covered by the read. Alignment " + alignmentStart + " | " + cigar); } } return Pair.of(readBases, fallsInsideOrJustBeforeDeletionOrSkippedRegion); }
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate( final int alignmentStart, final Cigar cigar, final int refCoord, final boolean allowGoalNotReached) { int readBases = 0; int refBases = 0; boolean fallsInsideDeletion = false; int goal = refCoord - alignmentStart; // The goal is to move this many reference bases if (goal < 0) { if (allowGoalNotReached) { return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false); } else { throw new ReviewedStingException( "Somehow the requested coordinate is not covered by the read. Too many deletions?"); } } boolean goalReached = refBases == goal; Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator(); while (!goalReached && cigarElementIterator.hasNext()) { CigarElement cigarElement = cigarElementIterator.next(); int shift = 0; if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) { if (refBases + cigarElement.getLength() < goal) shift = cigarElement.getLength(); else shift = goal - refBases; refBases += shift; } goalReached = refBases == goal; if (!goalReached && cigarElement.getOperator().consumesReadBases()) readBases += cigarElement.getLength(); if (goalReached) { // Is this base's reference position within this cigar element? Or did we use it all? boolean endsWithinCigar = shift < cigarElement.getLength(); // If it isn't, we need to check the next one. There should *ALWAYS* be a next one // since we checked if the goal coordinate is within the read length, so this is just a // sanity check. if (!endsWithinCigar && !cigarElementIterator.hasNext()) { if (allowGoalNotReached) { return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false); } else { throw new ReviewedStingException( "Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio"); } } CigarElement nextCigarElement; // if we end inside the current cigar element, we just have to check if it is a deletion if (endsWithinCigar) fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION; // if we end outside the current cigar element, we need to check if the next element is an // insertion or deletion. else { nextCigarElement = cigarElementIterator.next(); // if it's an insertion, we need to clip the whole insertion before looking at the next // element if (nextCigarElement.getOperator() == CigarOperator.INSERTION) { readBases += nextCigarElement.getLength(); if (!cigarElementIterator.hasNext()) { if (allowGoalNotReached) { return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false); } else { throw new ReviewedStingException( "Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio"); } } nextCigarElement = cigarElementIterator.next(); } // if it's a deletion, we will pass the information on to be handled downstream. fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION; } // If we reached our goal outside a deletion, add the shift if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases()) readBases += shift; // If we reached our goal inside a deletion, but the deletion is the next cigar element then // we need // to add the shift of the current cigar element but go back to it's last element to return // the last // base before the deletion (see warning in function contracts) else if (fallsInsideDeletion && !endsWithinCigar) readBases += shift - 1; // If we reached our goal inside a deletion then we must backtrack to the last base before // the deletion else if (fallsInsideDeletion && endsWithinCigar) readBases--; } } if (!goalReached) { if (allowGoalNotReached) { return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false); } else { throw new ReviewedStingException( "Somehow the requested coordinate is not covered by the read. Alignment " + alignmentStart + " | " + cigar); } } return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion); }