Exemplo n.º 1
0
  /**
   * Returns the coverage distribution of a single read within the desired region.
   *
   * <p>Note: This function counts DELETIONS as coverage (since the main purpose is to downsample
   * reads for variant regions, and deletions count as variants)
   *
   * @param read the read to get the coverage distribution of
   * @param startLocation the first reference coordinate of the region (inclusive)
   * @param stopLocation the last reference coordinate of the region (inclusive)
   * @return an array with the coverage of each position from startLocation to stopLocation
   */
  public static int[] getCoverageDistributionOfRead(
      GATKSAMRecord read, int startLocation, int stopLocation) {
    int[] coverage = new int[stopLocation - startLocation + 1];
    int refLocation = read.getSoftStart();
    for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
      switch (cigarElement.getOperator()) {
        case S:
        case M:
        case EQ:
        case N:
        case X:
        case D:
          for (int i = 0; i < cigarElement.getLength(); i++) {
            if (refLocation >= startLocation && refLocation <= stopLocation) {
              int baseCount =
                  read.isReducedRead()
                      ? read.getReducedCount(refLocation - read.getSoftStart())
                      : 1;
              coverage[refLocation - startLocation] +=
                  baseCount; // this may be a reduced read, so add the proper number of bases
            }
            refLocation++;
          }
          break;

        case P:
        case I:
        case H:
          break;
      }

      if (refLocation > stopLocation) break;
    }
    return coverage;
  }
Exemplo n.º 2
0
  /**
   * Creates a map with each event in the read (cigar operator) and the read coordinate where it
   * happened.
   *
   * <p>Example: D -> 2, 34, 75 I -> 55 S -> 0, 101 H -> 101
   *
   * @param read the read
   * @return a map with the properties described above. See example
   */
  public static Map<CigarOperator, ArrayList<Integer>> getCigarOperatorForAllBases(
      GATKSAMRecord read) {
    Map<CigarOperator, ArrayList<Integer>> events =
        new HashMap<CigarOperator, ArrayList<Integer>>();

    int position = 0;
    for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
      CigarOperator op = cigarElement.getOperator();
      if (op.consumesReadBases()) {
        ArrayList<Integer> list = events.get(op);
        if (list == null) {
          list = new ArrayList<Integer>();
          events.put(op, list);
        }
        for (int i = position; i < cigarElement.getLength(); i++) list.add(position++);
      } else {
        ArrayList<Integer> list = events.get(op);
        if (list == null) {
          list = new ArrayList<Integer>();
          events.put(op, list);
        }
        list.add(position);
      }
    }
    return events;
  }
Exemplo n.º 3
0
 /**
  * Returns the read coordinate corresponding to the requested reference coordinate.
  *
  * <p>WARNING: if the requested reference coordinate happens to fall inside a deletion in the
  * read, this function will return the last read base before the deletion. This function returns a
  * Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use
  * when faced with a deletion.
  *
  * <p>SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail)
  * instead to get a pre-processed result according to normal clipping needs. Or you can use this
  * function and tailor the behavior to your needs.
  *
  * @param read
  * @param refCoord
  * @return the read coordinate corresponding to the requested reference coordinate. (see warning!)
  */
 @Requires({"refCoord >= read.getSoftStart()", "refCoord <= read.getSoftEnd()"})
 @Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"})
 public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(
     GATKSAMRecord read, int refCoord) {
   return getReadCoordinateForReferenceCoordinate(
       read.getSoftStart(), read.getCigar(), refCoord, false);
 }
Exemplo n.º 4
0
 /**
  * Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) to
  * take care of two corner cases:
  *
  * <p>1. If clipping the right tail (end of the read) getReadCoordinateForReferenceCoordinate and
  * fall inside a deletion return the base after the deletion. If clipping the left tail (beginning
  * of the read) it doesn't matter because it already returns the previous base by default.
  *
  * <p>2. If clipping the left tail (beginning of the read) getReadCoordinateForReferenceCoordinate
  * and the read starts with an insertion, and you're requesting the first read based coordinate,
  * it will skip the leading insertion (because it has the same reference coordinate as the
  * following base).
  *
  * @param read
  * @param refCoord
  * @param tail
  * @return the read coordinate corresponding to the requested reference coordinate for clipping.
  */
 @Requires({
   "refCoord >= read.getUnclippedStart()",
   "refCoord <= read.getUnclippedEnd() || (read.getUnclippedEnd() < read.getUnclippedStart())"
 })
 @Ensures({"result >= 0", "result < read.getReadLength()"})
 public static int getReadCoordinateForReferenceCoordinate(
     GATKSAMRecord read, int refCoord, ClippingTail tail) {
   return getReadCoordinateForReferenceCoordinate(
       read.getSoftStart(), read.getCigar(), refCoord, tail, false);
 }
Exemplo n.º 5
0
  /**
   * Calculates the reference coordinate for a read coordinate
   *
   * @param read the read
   * @param offset the base in the read (coordinate in the read)
   * @return the reference coordinate correspondent to this base
   */
  public static long getReferenceCoordinateForReadCoordinate(GATKSAMRecord read, int offset) {
    if (offset > read.getReadLength())
      throw new ReviewedStingException(
          String.format(OFFSET_OUT_OF_BOUNDS_EXCEPTION, offset, read.getReadLength()));

    long location = read.getAlignmentStart();
    Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
    while (offset > 0 && cigarElementIterator.hasNext()) {
      CigarElement cigarElement = cigarElementIterator.next();
      long move = 0;
      if (cigarElement.getOperator().consumesReferenceBases())
        move = (long) Math.min(cigarElement.getLength(), offset);
      location += move;
      offset -= move;
    }
    if (offset > 0 && !cigarElementIterator.hasNext())
      throw new ReviewedStingException(OFFSET_NOT_ZERO_EXCEPTION);

    return location;
  }
Exemplo n.º 6
0
 /**
  * Checks if a read starts with an insertion. It looks beyond Hard and Soft clips if there are
  * any.
  *
  * @param read
  * @return A pair with the answer (true/false) and the element or null if it doesn't exist
  */
 public static Pair<Boolean, CigarElement> readStartsWithInsertion(GATKSAMRecord read) {
   return readStartsWithInsertion(read.getCigar());
 }
Exemplo n.º 7
0
 /**
  * Is this read all insertion?
  *
  * @param read
  * @return whether or not the only element in the cigar string is an Insertion
  */
 public static boolean readIsEntirelyInsertion(GATKSAMRecord read) {
   for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
     if (cigarElement.getOperator() != CigarOperator.INSERTION) return false;
   }
   return true;
 }