예제 #1
0
파일: ReadUtils.java 프로젝트: AriesLL/gatk
  public static int getReadCoordinateForReferenceCoordinate(
      final int alignmentStart,
      final Cigar cigar,
      final int refCoord,
      final ClippingTail tail,
      final boolean allowGoalNotReached) {
    final Pair<Integer, Boolean> result =
        getReadCoordinateForReferenceCoordinate(
            alignmentStart, cigar, refCoord, allowGoalNotReached);
    int readCoord = result.getLeft();

    // Corner case one: clipping the right tail and falls on deletion, move to the next
    // read coordinate. It is not a problem for the left tail because the default answer
    // from getReadCoordinateForReferenceCoordinate is to give the previous read coordinate.
    if (result.getRight() && tail == ClippingTail.RIGHT_TAIL) {
      readCoord++;
    }

    // clipping the left tail and first base is insertion, go to the next read coordinate
    // with the same reference coordinate. Advance to the next cigar element, or to the
    // end of the read if there is no next element.
    final CigarElement firstElementIsInsertion = readStartsWithInsertion(cigar);
    if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion != null) {
      readCoord = Math.min(firstElementIsInsertion.getLength(), cigar.getReadLength() - 1);
    }

    return readCoord;
  }
예제 #2
0
파일: ReadUtils.java 프로젝트: AriesLL/gatk
  /**
   * Calculates the reference coordinate for the end of the read taking into account soft clips but
   * not hard clips.
   *
   * <p>Note: getUnclippedEnd() adds soft and hard clips, this function only adds soft clips.
   *
   * @param read the read
   * @param cigar the read's cigar
   *     <p>Note: this overload of the function takes the cigar as input for speed because getCigar
   *     is an expensive operation. Most callers should use the overload that does not take the
   *     cigar.
   * @return the unclipped end of the read taking soft clips (but not hard clips) into account
   */
  public static int getSoftEnd(final GATKRead read, final Cigar cigar) {
    Utils.nonNull(read, "read");
    Utils.nonNull(cigar, "cigar");

    boolean foundAlignedBase = false;
    int softEnd = read.getEnd();
    final List<CigarElement> cigs = cigar.getCigarElements();
    for (int i = cigs.size() - 1; i >= 0; --i) {
      final CigarElement cig = cigs.get(i);
      final CigarOperator op = cig.getOperator();

      if (op
          == CigarOperator
              .SOFT_CLIP) { // assumes the soft clip that we found is at the end of the aligned read
        softEnd += cig.getLength();
      } else if (op != CigarOperator.HARD_CLIP) {
        foundAlignedBase = true;
        break;
      }
    }
    if (!foundAlignedBase) { // for example 64H14S, the soft end is actually the same as the
                             // alignment end
      softEnd = read.getEnd();
    }
    return softEnd;
  }
예제 #3
0
 public static Pair<Boolean, CigarElement> readStartsWithInsertion(final Cigar cigar) {
   for (CigarElement cigarElement : cigar.getCigarElements()) {
     if (cigarElement.getOperator() == CigarOperator.INSERTION)
       return new Pair<Boolean, CigarElement>(true, cigarElement);
     else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP
         && cigarElement.getOperator() != CigarOperator.SOFT_CLIP) break;
   }
   return new Pair<Boolean, CigarElement>(false, null);
 }
 private static boolean isAfterOp(
     final Cigar cigar,
     final int currentOperatorIndex,
     final CigarOperator op,
     final boolean isFirstOp,
     final boolean isFirstBaseOfOp) {
   return !isFirstOp
       && isFirstBaseOfOp
       && cigar.getCigarElement(currentOperatorIndex - 1).getOperator() == op;
 }
예제 #5
0
파일: ReadUtils.java 프로젝트: AriesLL/gatk
 /**
  * Checks if a read starts with an insertion.
  *
  * @param cigarForRead the CIGAR to evaluate
  * @param ignoreSoftClipOps should we ignore S operators when evaluating whether an I operator is
  *     at the beginning? Note that H operators are always ignored.
  * @return the element if it's a leading insertion or null otherwise
  */
 public static CigarElement readStartsWithInsertion(
     final Cigar cigarForRead, final boolean ignoreSoftClipOps) {
   for (final CigarElement cigarElement : cigarForRead.getCigarElements()) {
     if (cigarElement.getOperator() == CigarOperator.INSERTION) {
       return cigarElement;
     } else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP
         && (!ignoreSoftClipOps || cigarElement.getOperator() != CigarOperator.SOFT_CLIP)) {
       break;
     }
   }
   return null;
 }
예제 #6
0
파일: ReadUtils.java 프로젝트: AriesLL/gatk
  /**
   * Calculates the reference coordinate for the beginning of the read taking into account soft
   * clips but not hard clips.
   *
   * <p>Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips.
   *
   * @param read the read
   * @param cigar the read's cigar
   *     <p>Note: this overload of the function takes the cigar as input for speed because getCigar
   *     is an expensive operation. Most callers should use the overload that does not take the
   *     cigar.
   * @return the unclipped start of the read taking soft clips (but not hard clips) into account
   */
  public static int getSoftStart(final GATKRead read, final Cigar cigar) {
    Utils.nonNull(read, "read");
    Utils.nonNull(cigar, "cigar");

    int softStart = read.getStart();
    for (final CigarElement cig : cigar.getCigarElements()) {
      final CigarOperator op = cig.getOperator();

      if (op == CigarOperator.SOFT_CLIP) {
        softStart -= cig.getLength();
      } else if (op != CigarOperator.HARD_CLIP) {
        break;
      }
    }
    return softStart;
  }
예제 #7
0
파일: ReadUtils.java 프로젝트: AriesLL/gatk
  private static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(
      final int alignmentStart,
      final Cigar cigar,
      final int refCoord,
      final boolean allowGoalNotReached) {
    int readBases = 0;
    int refBases = 0;
    boolean fallsInsideDeletionOrSkippedRegion = false;
    boolean endJustBeforeDeletionOrSkippedRegion = false;
    boolean fallsInsideOrJustBeforeDeletionOrSkippedRegion = false;

    final int goal = refCoord - alignmentStart; // The goal is to move this many reference bases
    if (goal < 0) {
      if (allowGoalNotReached) {
        return new MutablePair<>(CLIPPING_GOAL_NOT_REACHED, false);
      } else {
        throw new GATKException(
            "Somehow the requested coordinate is not covered by the read. Too many deletions?");
      }
    }
    boolean goalReached = refBases == goal;

    final Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator();
    while (!goalReached && cigarElementIterator.hasNext()) {
      final CigarElement cigarElement = cigarElementIterator.next();
      int shift = 0;

      if (cigarElement.getOperator().consumesReferenceBases()
          || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
        if (refBases + cigarElement.getLength() < goal) {
          shift = cigarElement.getLength();
        } else {
          shift = goal - refBases;
        }

        refBases += shift;
      }
      goalReached = refBases == goal;

      if (!goalReached && cigarElement.getOperator().consumesReadBases()) {
        readBases += cigarElement.getLength();
      }

      if (goalReached) {
        // Is this base's reference position within this cigar element? Or did we use it all?
        final boolean endsWithinCigar = shift < cigarElement.getLength();

        // If it isn't, we need to check the next one. There should *ALWAYS* be a next one
        // since we checked if the goal coordinate is within the read length, so this is just a
        // sanity check.
        if (!endsWithinCigar && !cigarElementIterator.hasNext()) {
          if (allowGoalNotReached) {
            return new MutablePair<>(CLIPPING_GOAL_NOT_REACHED, false);
          } else {
            throw new GATKException(
                String.format(
                    "Reference coordinate corresponds to a non-existent base in the read. This should never happen -- check read with alignment start: %s  and cigar: %s",
                    alignmentStart, cigar));
          }
        }

        CigarElement nextCigarElement = null;

        // if we end inside the current cigar element, we just have to check if it is a deletion (or
        // skipped region)
        if (endsWithinCigar) {
          fallsInsideDeletionOrSkippedRegion =
              (cigarElement.getOperator() == CigarOperator.DELETION
                  || cigarElement.getOperator() == CigarOperator.SKIPPED_REGION);
        } // if we end outside the current cigar element, we need to check if the next element is an
          // insertion, deletion or skipped region.
        else {
          nextCigarElement = cigarElementIterator.next();

          // if it's an insertion, we need to clip the whole insertion before looking at the next
          // element
          if (nextCigarElement.getOperator() == CigarOperator.INSERTION) {
            readBases += nextCigarElement.getLength();
            if (!cigarElementIterator.hasNext()) {
              if (allowGoalNotReached) {
                return new MutablePair<>(CLIPPING_GOAL_NOT_REACHED, false);
              } else {
                throw new GATKException(
                    String.format(
                        "Reference coordinate corresponds to a non-existent base in the read. This should never happen -- check read with alignment start: %s  and cigar: %s",
                        alignmentStart, cigar));
              }
            }

            nextCigarElement = cigarElementIterator.next();
          }

          // if it's a deletion (or skipped region), we will pass the information on to be handled
          // downstream.
          endJustBeforeDeletionOrSkippedRegion =
              (nextCigarElement.getOperator() == CigarOperator.DELETION
                  || nextCigarElement.getOperator() == CigarOperator.SKIPPED_REGION);
        }

        fallsInsideOrJustBeforeDeletionOrSkippedRegion =
            endJustBeforeDeletionOrSkippedRegion || fallsInsideDeletionOrSkippedRegion;

        // If we reached our goal outside a deletion (or skipped region), add the shift
        if (!fallsInsideOrJustBeforeDeletionOrSkippedRegion
            && cigarElement.getOperator().consumesReadBases()) {
          readBases += shift;
        } // If we reached our goal just before a deletion (or skipped region) we need
        // to add the shift of the current cigar element but go back to it's last element to return
        // the last
        // base before the deletion (or skipped region) (see warning in function contracts)
        else if (endJustBeforeDeletionOrSkippedRegion
            && cigarElement.getOperator().consumesReadBases()) {
          readBases += shift - 1;
        } // If we reached our goal inside a deletion (or skipped region), or just between a
          // deletion and a skipped region,
        // then we must backtrack to the last base before the deletion (or skipped region)
        else if (fallsInsideDeletionOrSkippedRegion
            || (endJustBeforeDeletionOrSkippedRegion
                && nextCigarElement.getOperator().equals(CigarOperator.N))
            || (endJustBeforeDeletionOrSkippedRegion
                && nextCigarElement.getOperator().equals(CigarOperator.D))) {
          readBases--;
        }
      }
    }

    if (!goalReached) {
      if (allowGoalNotReached) {
        return new MutablePair<>(CLIPPING_GOAL_NOT_REACHED, false);
      } else {
        throw new GATKException(
            "Somehow the requested coordinate is not covered by the read. Alignment "
                + alignmentStart
                + " | "
                + cigar);
      }
    }

    return Pair.of(readBases, fallsInsideOrJustBeforeDeletionOrSkippedRegion);
  }
예제 #8
0
  public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(
      final int alignmentStart,
      final Cigar cigar,
      final int refCoord,
      final boolean allowGoalNotReached) {
    int readBases = 0;
    int refBases = 0;
    boolean fallsInsideDeletion = false;

    int goal = refCoord - alignmentStart; // The goal is to move this many reference bases
    if (goal < 0) {
      if (allowGoalNotReached) {
        return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false);
      } else {
        throw new ReviewedStingException(
            "Somehow the requested coordinate is not covered by the read. Too many deletions?");
      }
    }
    boolean goalReached = refBases == goal;

    Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator();
    while (!goalReached && cigarElementIterator.hasNext()) {
      CigarElement cigarElement = cigarElementIterator.next();
      int shift = 0;

      if (cigarElement.getOperator().consumesReferenceBases()
          || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
        if (refBases + cigarElement.getLength() < goal) shift = cigarElement.getLength();
        else shift = goal - refBases;

        refBases += shift;
      }
      goalReached = refBases == goal;

      if (!goalReached && cigarElement.getOperator().consumesReadBases())
        readBases += cigarElement.getLength();

      if (goalReached) {
        // Is this base's reference position within this cigar element? Or did we use it all?
        boolean endsWithinCigar = shift < cigarElement.getLength();

        // If it isn't, we need to check the next one. There should *ALWAYS* be a next one
        // since we checked if the goal coordinate is within the read length, so this is just a
        // sanity check.
        if (!endsWithinCigar && !cigarElementIterator.hasNext()) {
          if (allowGoalNotReached) {
            return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false);
          } else {
            throw new ReviewedStingException(
                "Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
          }
        }

        CigarElement nextCigarElement;

        // if we end inside the current cigar element, we just have to check if it is a deletion
        if (endsWithinCigar)
          fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;

        // if we end outside the current cigar element, we need to check if the next element is an
        // insertion or deletion.
        else {
          nextCigarElement = cigarElementIterator.next();

          // if it's an insertion, we need to clip the whole insertion before looking at the next
          // element
          if (nextCigarElement.getOperator() == CigarOperator.INSERTION) {
            readBases += nextCigarElement.getLength();
            if (!cigarElementIterator.hasNext()) {
              if (allowGoalNotReached) {
                return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false);
              } else {
                throw new ReviewedStingException(
                    "Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
              }
            }

            nextCigarElement = cigarElementIterator.next();
          }

          // if it's a deletion, we will pass the information on to be handled downstream.
          fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
        }

        // If we reached our goal outside a deletion, add the shift
        if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
          readBases += shift;

        // If we reached our goal inside a deletion, but the deletion is the next cigar element then
        // we need
        // to add the shift of the current cigar element but go back to it's last element to return
        // the last
        // base before the deletion (see warning in function contracts)
        else if (fallsInsideDeletion && !endsWithinCigar) readBases += shift - 1;

        // If we reached our goal inside a deletion then we must backtrack to the last base before
        // the deletion
        else if (fallsInsideDeletion && endsWithinCigar) readBases--;
      }
    }

    if (!goalReached) {
      if (allowGoalNotReached) {
        return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false);
      } else {
        throw new ReviewedStingException(
            "Somehow the requested coordinate is not covered by the read. Alignment "
                + alignmentStart
                + " | "
                + cigar);
      }
    }

    return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
  }