@Override
 protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) {
   return (double)
       read.getBaseQualities()[
           ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(
               read, refLoc, ReadUtils.ClippingTail.RIGHT_TAIL)];
 }
Esempio n. 2
0
  /**
   * Hard clip bases from read, from start to stop in base coordinates
   *
   * <p>If start == 0, then we will clip from the front of the read, otherwise we clip from the
   * right. If start == 0 and stop == 10, this would clip out the first 10 bases of the read.
   *
   * <p>Note that this function works with reads with negative alignment starts, in order to allow
   * us to hardClip reads that have had their soft clips reverted and so might have negative
   * alignment starts
   *
   * <p>Works properly with reduced reads and insertion/deletion base qualities
   *
   * @param read a non-null read
   * @param start a start >= 0 and < read.length
   * @param stop a stop >= 0 and < read.length.
   * @return a cloned version of read that has been properly trimmed down
   */
  private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) {

    // If the read is unmapped there is no Cigar string and neither should we create a new cigar
    // string
    final CigarShift cigarShift =
        (read.getReadUnmappedFlag())
            ? new CigarShift(new Cigar(), 0, 0)
            : hardClipCigar(read.getCigar(), start, stop);

    // the cigar may force a shift left or right (or both) in case we are left with insertions
    // starting or ending the read after applying the hard clip on start/stop.
    final int newLength =
        read.getReadLength()
            - (stop - start + 1)
            - cigarShift.shiftFromStart
            - cigarShift.shiftFromEnd;
    final byte[] newBases = new byte[newLength];
    final byte[] newQuals = new byte[newLength];
    final int copyStart =
        (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;

    System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
    System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);

    final GATKSAMRecord hardClippedRead = (GATKSAMRecord) read.clone();

    hardClippedRead
        .resetSoftStartAndEnd(); // reset the cached soft start and end because they may have
                                 // changed now that the read was hard clipped. No need to calculate
                                 // them now. They'll be lazily calculated on the next call to
                                 // getSoftStart()/End()
    hardClippedRead.setBaseQualities(newQuals);
    hardClippedRead.setReadBases(newBases);
    hardClippedRead.setCigar(cigarShift.cigar);
    if (start == 0)
      hardClippedRead.setAlignmentStart(
          read.getAlignmentStart()
              + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));

    if (read.hasBaseIndelQualities()) {
      final byte[] newBaseInsertionQuals = new byte[newLength];
      final byte[] newBaseDeletionQuals = new byte[newLength];
      System.arraycopy(
          read.getBaseInsertionQualities(), copyStart, newBaseInsertionQuals, 0, newLength);
      System.arraycopy(
          read.getBaseDeletionQualities(), copyStart, newBaseDeletionQuals, 0, newLength);
      hardClippedRead.setBaseQualities(newBaseInsertionQuals, EventType.BASE_INSERTION);
      hardClippedRead.setBaseQualities(newBaseDeletionQuals, EventType.BASE_DELETION);
    }

    return hardClippedRead;
  }
Esempio n. 3
0
  /**
   * @param read a read containing the variant
   * @return number of hard clipped and low qual bases at the read end (where end is right end
   *     w.r.t. the reference)
   */
  public static int getNumClippedBasesAtEnd(final GATKSAMRecord read) {
    // check for hard clips (never consider these bases):
    final Cigar c = read.getCigar();
    CigarElement last = c.getCigarElement(c.numCigarElements() - 1);

    int numEndClippedBases = 0;
    if (last.getOperator() == CigarOperator.H) {
      numEndClippedBases = last.getLength();
    }
    final byte[] unclippedReadBases = read.getReadBases();
    final byte[] unclippedReadQuals = read.getBaseQualities();

    // Do a stricter base clipping than provided by CIGAR string, since this one may be too
    // conservative,
    // and may leave a string of Q2 bases still hanging off the reads.
    // TODO: this code may not even get used because HaplotypeCaller already hard clips low quality
    // tails
    for (int i = unclippedReadBases.length - numEndClippedBases - 1; i >= 0; i--) {
      if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD) numEndClippedBases++;
      else break;
    }

    return numEndClippedBases;
  }
Esempio n. 4
0
  /**
   * Clips the bases in read according to this operation's start and stop. Uses the clipping
   * representation used is the one provided by algorithm argument.
   *
   * @param algorithm clipping algorithm to use
   * @param originalRead the read to be clipped
   */
  public GATKSAMRecord apply(ClippingRepresentation algorithm, GATKSAMRecord originalRead) {
    GATKSAMRecord read = (GATKSAMRecord) originalRead.clone();
    byte[] quals = read.getBaseQualities();
    byte[] bases = read.getReadBases();
    byte[] newBases = new byte[bases.length];
    byte[] newQuals = new byte[quals.length];

    switch (algorithm) {
        // important note:
        //   it's not safe to call read.getReadBases()[i] = 'N' or read.getBaseQualities()[i] = 0
        //   because you're not guaranteed to get a pointer to the actual array of bytes in the
        // GATKSAMRecord
      case WRITE_NS:
        for (int i = 0; i < bases.length; i++) {
          if (i >= start && i <= stop) {
            newBases[i] = 'N';
          } else {
            newBases[i] = bases[i];
          }
        }
        read.setReadBases(newBases);
        break;
      case WRITE_Q0S:
        for (int i = 0; i < quals.length; i++) {
          if (i >= start && i <= stop) {
            newQuals[i] = 0;
          } else {
            newQuals[i] = quals[i];
          }
        }
        read.setBaseQualities(newQuals);
        break;
      case WRITE_NS_Q0S:
        for (int i = 0; i < bases.length; i++) {
          if (i >= start && i <= stop) {
            newQuals[i] = 0;
            newBases[i] = 'N';
          } else {
            newQuals[i] = quals[i];
            newBases[i] = bases[i];
          }
        }
        read.setBaseQualities(newBases);
        read.setReadBases(newBases);
        break;
      case HARDCLIP_BASES:
        read = hardClip(read, start, stop);
        break;

      case SOFTCLIP_BASES:
        if (read.getReadUnmappedFlag()) {
          // we can't process unmapped reads
          throw new UserException("Read Clipper cannot soft clip unmapped reads");
        }

        // System.out.printf("%d %d %d%n", stop, start, read.getReadLength());
        int myStop = stop;
        if ((stop + 1 - start) == read.getReadLength()) {
          // BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it
          // alone
          // Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be
          // represented with SOFTCLIP_BASES, just leaving it alone", read.getReadName()));
          // break;
          myStop--; // just decrement stop
        }

        if (start > 0 && myStop != read.getReadLength() - 1)
          throw new RuntimeException(
              String.format(
                  "Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d",
                  read.getReadName(), start, myStop));

        Cigar oldCigar = read.getCigar();

        int scLeft = 0, scRight = read.getReadLength();
        if (start == 0) scLeft = myStop + 1;
        else scRight = start;

        Cigar newCigar = softClip(oldCigar, scLeft, scRight);
        read.setCigar(newCigar);

        int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar);
        int newStart = read.getAlignmentStart() + newClippedStart;
        read.setAlignmentStart(newStart);

        break;

      case REVERT_SOFTCLIPPED_BASES:
        read = revertSoftClippedBases(read);
        break;

      default:
        throw new IllegalStateException("Unexpected Clipping operator type " + algorithm);
    }

    return read;
  }