Example #1
0
  private GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) {
    GATKSAMRecord unclipped = (GATKSAMRecord) read.clone();

    Cigar unclippedCigar = new Cigar();
    int matchesCount = 0;
    for (CigarElement element : read.getCigar().getCigarElements()) {
      if (element.getOperator() == CigarOperator.SOFT_CLIP
          || element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
        matchesCount += element.getLength();
      else if (matchesCount > 0) {
        unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
        matchesCount = 0;
        unclippedCigar.add(element);
      } else unclippedCigar.add(element);
    }
    if (matchesCount > 0)
      unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));

    unclipped.setCigar(unclippedCigar);
    final int newStart =
        read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar);
    unclipped.setAlignmentStart(newStart);

    if (newStart <= 0) {
      // if the start of the unclipped read occurs before the contig,
      // we must hard clip away the bases since we cannot represent reads with
      // negative or 0 alignment start values in the SAMRecord (e.g., 0 means unaligned)
      return hardClip(unclipped, 0, -newStart);
    } else {
      return unclipped;
    }
  }
Example #2
0
  /**
   * Checks if a hard clipped cigar left a read starting or ending with deletions or gap (N) and
   * cleans it up accordingly.
   *
   * @param cigar the original cigar
   * @return an object with the shifts (see CigarShift class)
   */
  private CigarShift cleanHardClippedCigar(final Cigar cigar) {
    final Cigar cleanCigar = new Cigar();
    int shiftFromStart = 0;
    int shiftFromEnd = 0;
    Stack<CigarElement> cigarStack = new Stack<CigarElement>();
    final Stack<CigarElement> inverseCigarStack = new Stack<CigarElement>();

    for (final CigarElement cigarElement : cigar.getCigarElements()) cigarStack.push(cigarElement);

    for (int i = 1; i <= 2; i++) {
      int shift = 0;
      int totalHardClip = 0;
      boolean readHasStarted = false;
      boolean addedHardClips = false;

      while (!cigarStack.empty()) {
        CigarElement cigarElement = cigarStack.pop();

        if (!readHasStarted
            && cigarElement.getOperator() != CigarOperator.DELETION
            && cigarElement.getOperator() != CigarOperator.SKIPPED_REGION
            && cigarElement.getOperator() != CigarOperator.HARD_CLIP) readHasStarted = true;
        else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP)
          totalHardClip += cigarElement.getLength();
        else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION)
          totalHardClip += cigarElement.getLength();
        else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.SKIPPED_REGION)
          totalHardClip += cigarElement.getLength();

        if (readHasStarted) {
          if (i == 1) {
            if (!addedHardClips) {
              if (totalHardClip > 0)
                inverseCigarStack.push(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
              addedHardClips = true;
            }
            inverseCigarStack.push(cigarElement);
          } else {
            if (!addedHardClips) {
              if (totalHardClip > 0)
                cleanCigar.add(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
              addedHardClips = true;
            }
            cleanCigar.add(cigarElement);
          }
        }
      }
      // first pass  (i=1) is from end to start of the cigar elements
      if (i == 1) {
        shiftFromEnd = shift;
        cigarStack = inverseCigarStack;
      }
      // second pass (i=2) is from start to end with the end already cleaned
      else {
        shiftFromStart = shift;
      }
    }
    return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd);
  }
Example #3
0
  @Requires({"!cigar.isEmpty()"})
  private CigarShift hardClipCigar(Cigar cigar, int start, int stop) {
    Cigar newCigar = new Cigar();
    int index = 0;
    int totalHardClipCount = stop - start + 1;
    int alignmentShift = 0; // caused by hard clipping deletions

    // hard clip the beginning of the cigar string
    if (start == 0) {
      Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator();
      CigarElement cigarElement = cigarElementIterator.next();
      // Skip all leading hard clips
      while (cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
        totalHardClipCount += cigarElement.getLength();
        if (cigarElementIterator.hasNext()) cigarElement = cigarElementIterator.next();
        else
          throw new ReviewedGATKException(
              "Read is entirely hardclipped, shouldn't be trying to clip it's cigar string");
      }
      // keep clipping until we hit stop
      while (index <= stop) {
        int shift = 0;
        if (cigarElement.getOperator().consumesReadBases()) shift = cigarElement.getLength();

        // we're still clipping or just finished perfectly
        if (index + shift == stop + 1) {
          alignmentShift +=
              calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
          newCigar.add(
              new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
        }
        // element goes beyond what we need to clip
        else if (index + shift > stop + 1) {
          int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
          alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop - index + 1);
          newCigar.add(
              new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
          newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
        }
        index += shift;
        alignmentShift += calculateHardClippingAlignmentShift(cigarElement, shift);

        if (index <= stop && cigarElementIterator.hasNext())
          cigarElement = cigarElementIterator.next();
        else break;
      }

      // add the remaining cigar elements
      while (cigarElementIterator.hasNext()) {
        cigarElement = cigarElementIterator.next();
        newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator()));
      }
    }

    // hard clip the end of the cigar string
    else {
      Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator();
      CigarElement cigarElement = cigarElementIterator.next();

      // Keep marching on until we find the start
      while (index < start) {
        int shift = 0;
        if (cigarElement.getOperator().consumesReadBases()) shift = cigarElement.getLength();

        // we haven't gotten to the start yet, keep everything as is.
        if (index + shift < start)
          newCigar.add(new CigarElement(cigarElement.getLength(), cigarElement.getOperator()));

        // element goes beyond our clip starting position
        else {
          int elementLengthAfterChopping = start - index;
          alignmentShift +=
              calculateHardClippingAlignmentShift(
                  cigarElement, cigarElement.getLength() - (start - index));

          // if this last element is a HARD CLIP operator, just merge it with our hard clip operator
          // to be added later
          if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
            totalHardClipCount += elementLengthAfterChopping;
          // otherwise, maintain what's left of this last operator
          else
            newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
        }
        index += shift;
        if (index < start && cigarElementIterator.hasNext())
          cigarElement = cigarElementIterator.next();
        else break;
      }

      // check if we are hard clipping indels
      while (cigarElementIterator.hasNext()) {
        cigarElement = cigarElementIterator.next();
        alignmentShift +=
            calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());

        // if the read had a HardClip operator in the end, combine it with the Hard Clip we are
        // adding
        if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
          totalHardClipCount += cigarElement.getLength();
      }
      newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
    }
    return cleanHardClippedCigar(newCigar);
  }
Example #4
0
  /** Given a cigar string, soft clip up to startClipEnd and soft clip starting at endClipBegin */
  private Cigar softClip(final Cigar __cigar, final int __startClipEnd, final int __endClipBegin) {
    if (__endClipBegin <= __startClipEnd) {
      // whole thing should be soft clipped
      int cigarLength = 0;
      for (CigarElement e : __cigar.getCigarElements()) {
        cigarLength += e.getLength();
      }

      Cigar newCigar = new Cigar();
      newCigar.add(new CigarElement(cigarLength, CigarOperator.SOFT_CLIP));
      assert newCigar.isValid(null, -1) == null;
      return newCigar;
    }

    int curLength = 0;
    Vector<CigarElement> newElements = new Vector<CigarElement>();
    for (CigarElement curElem : __cigar.getCigarElements()) {
      if (!curElem.getOperator().consumesReadBases()) {
        if (curElem.getOperator() == CigarOperator.HARD_CLIP
            || curLength > __startClipEnd && curLength < __endClipBegin) {
          newElements.add(new CigarElement(curElem.getLength(), curElem.getOperator()));
        }
        continue;
      }

      int s = curLength;
      int e = curLength + curElem.getLength();
      if (e <= __startClipEnd || s >= __endClipBegin) {
        // must turn this entire thing into a clip
        newElements.add(new CigarElement(curElem.getLength(), CigarOperator.SOFT_CLIP));
      } else if (s >= __startClipEnd && e <= __endClipBegin) {
        // same thing
        newElements.add(new CigarElement(curElem.getLength(), curElem.getOperator()));
      } else {
        // we are clipping in the middle of this guy
        CigarElement newStart = null;
        CigarElement newMid = null;
        CigarElement newEnd = null;

        int midLength = curElem.getLength();
        if (s < __startClipEnd) {
          newStart = new CigarElement(__startClipEnd - s, CigarOperator.SOFT_CLIP);
          midLength -= newStart.getLength();
        }

        if (e > __endClipBegin) {
          newEnd = new CigarElement(e - __endClipBegin, CigarOperator.SOFT_CLIP);
          midLength -= newEnd.getLength();
        }
        assert midLength >= 0;
        if (midLength > 0) {
          newMid = new CigarElement(midLength, curElem.getOperator());
        }
        if (newStart != null) {
          newElements.add(newStart);
        }
        if (newMid != null) {
          newElements.add(newMid);
        }
        if (newEnd != null) {
          newElements.add(newEnd);
        }
      }
      curLength += curElem.getLength();
    }

    Vector<CigarElement> finalNewElements = new Vector<CigarElement>();
    CigarElement lastElement = null;
    for (CigarElement elem : newElements) {
      if (lastElement == null || lastElement.getOperator() != elem.getOperator()) {
        if (lastElement != null) {
          finalNewElements.add(lastElement);
        }
        lastElement = elem;
      } else {
        lastElement =
            new CigarElement(lastElement.getLength() + elem.getLength(), lastElement.getOperator());
      }
    }
    if (lastElement != null) {
      finalNewElements.add(lastElement);
    }

    Cigar newCigar = new Cigar(finalNewElements);
    assert newCigar.isValid(null, -1) == null;
    return newCigar;
  }