示例#1
0
  private GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) {
    GATKSAMRecord unclipped = (GATKSAMRecord) read.clone();

    Cigar unclippedCigar = new Cigar();
    int matchesCount = 0;
    for (CigarElement element : read.getCigar().getCigarElements()) {
      if (element.getOperator() == CigarOperator.SOFT_CLIP
          || element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
        matchesCount += element.getLength();
      else if (matchesCount > 0) {
        unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
        matchesCount = 0;
        unclippedCigar.add(element);
      } else unclippedCigar.add(element);
    }
    if (matchesCount > 0)
      unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));

    unclipped.setCigar(unclippedCigar);
    final int newStart =
        read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar);
    unclipped.setAlignmentStart(newStart);

    if (newStart <= 0) {
      // if the start of the unclipped read occurs before the contig,
      // we must hard clip away the bases since we cannot represent reads with
      // negative or 0 alignment start values in the SAMRecord (e.g., 0 means unaligned)
      return hardClip(unclipped, 0, -newStart);
    } else {
      return unclipped;
    }
  }
示例#2
0
  /**
   * Hard clip bases from read, from start to stop in base coordinates
   *
   * <p>If start == 0, then we will clip from the front of the read, otherwise we clip from the
   * right. If start == 0 and stop == 10, this would clip out the first 10 bases of the read.
   *
   * <p>Note that this function works with reads with negative alignment starts, in order to allow
   * us to hardClip reads that have had their soft clips reverted and so might have negative
   * alignment starts
   *
   * <p>Works properly with reduced reads and insertion/deletion base qualities
   *
   * @param read a non-null read
   * @param start a start >= 0 and < read.length
   * @param stop a stop >= 0 and < read.length.
   * @return a cloned version of read that has been properly trimmed down
   */
  private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) {

    // If the read is unmapped there is no Cigar string and neither should we create a new cigar
    // string
    final CigarShift cigarShift =
        (read.getReadUnmappedFlag())
            ? new CigarShift(new Cigar(), 0, 0)
            : hardClipCigar(read.getCigar(), start, stop);

    // the cigar may force a shift left or right (or both) in case we are left with insertions
    // starting or ending the read after applying the hard clip on start/stop.
    final int newLength =
        read.getReadLength()
            - (stop - start + 1)
            - cigarShift.shiftFromStart
            - cigarShift.shiftFromEnd;
    final byte[] newBases = new byte[newLength];
    final byte[] newQuals = new byte[newLength];
    final int copyStart =
        (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;

    System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
    System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);

    final GATKSAMRecord hardClippedRead = (GATKSAMRecord) read.clone();

    hardClippedRead
        .resetSoftStartAndEnd(); // reset the cached soft start and end because they may have
                                 // changed now that the read was hard clipped. No need to calculate
                                 // them now. They'll be lazily calculated on the next call to
                                 // getSoftStart()/End()
    hardClippedRead.setBaseQualities(newQuals);
    hardClippedRead.setReadBases(newBases);
    hardClippedRead.setCigar(cigarShift.cigar);
    if (start == 0)
      hardClippedRead.setAlignmentStart(
          read.getAlignmentStart()
              + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));

    if (read.hasBaseIndelQualities()) {
      final byte[] newBaseInsertionQuals = new byte[newLength];
      final byte[] newBaseDeletionQuals = new byte[newLength];
      System.arraycopy(
          read.getBaseInsertionQualities(), copyStart, newBaseInsertionQuals, 0, newLength);
      System.arraycopy(
          read.getBaseDeletionQualities(), copyStart, newBaseDeletionQuals, 0, newLength);
      hardClippedRead.setBaseQualities(newBaseInsertionQuals, EventType.BASE_INSERTION);
      hardClippedRead.setBaseQualities(newBaseDeletionQuals, EventType.BASE_DELETION);
    }

    return hardClippedRead;
  }
示例#3
0
  /**
   * Clips the bases in read according to this operation's start and stop. Uses the clipping
   * representation used is the one provided by algorithm argument.
   *
   * @param algorithm clipping algorithm to use
   * @param originalRead the read to be clipped
   */
  public GATKSAMRecord apply(ClippingRepresentation algorithm, GATKSAMRecord originalRead) {
    GATKSAMRecord read = (GATKSAMRecord) originalRead.clone();
    byte[] quals = read.getBaseQualities();
    byte[] bases = read.getReadBases();
    byte[] newBases = new byte[bases.length];
    byte[] newQuals = new byte[quals.length];

    switch (algorithm) {
        // important note:
        //   it's not safe to call read.getReadBases()[i] = 'N' or read.getBaseQualities()[i] = 0
        //   because you're not guaranteed to get a pointer to the actual array of bytes in the
        // GATKSAMRecord
      case WRITE_NS:
        for (int i = 0; i < bases.length; i++) {
          if (i >= start && i <= stop) {
            newBases[i] = 'N';
          } else {
            newBases[i] = bases[i];
          }
        }
        read.setReadBases(newBases);
        break;
      case WRITE_Q0S:
        for (int i = 0; i < quals.length; i++) {
          if (i >= start && i <= stop) {
            newQuals[i] = 0;
          } else {
            newQuals[i] = quals[i];
          }
        }
        read.setBaseQualities(newQuals);
        break;
      case WRITE_NS_Q0S:
        for (int i = 0; i < bases.length; i++) {
          if (i >= start && i <= stop) {
            newQuals[i] = 0;
            newBases[i] = 'N';
          } else {
            newQuals[i] = quals[i];
            newBases[i] = bases[i];
          }
        }
        read.setBaseQualities(newBases);
        read.setReadBases(newBases);
        break;
      case HARDCLIP_BASES:
        read = hardClip(read, start, stop);
        break;

      case SOFTCLIP_BASES:
        if (read.getReadUnmappedFlag()) {
          // we can't process unmapped reads
          throw new UserException("Read Clipper cannot soft clip unmapped reads");
        }

        // System.out.printf("%d %d %d%n", stop, start, read.getReadLength());
        int myStop = stop;
        if ((stop + 1 - start) == read.getReadLength()) {
          // BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it
          // alone
          // Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be
          // represented with SOFTCLIP_BASES, just leaving it alone", read.getReadName()));
          // break;
          myStop--; // just decrement stop
        }

        if (start > 0 && myStop != read.getReadLength() - 1)
          throw new RuntimeException(
              String.format(
                  "Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d",
                  read.getReadName(), start, myStop));

        Cigar oldCigar = read.getCigar();

        int scLeft = 0, scRight = read.getReadLength();
        if (start == 0) scLeft = myStop + 1;
        else scRight = start;

        Cigar newCigar = softClip(oldCigar, scLeft, scRight);
        read.setCigar(newCigar);

        int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar);
        int newStart = read.getAlignmentStart() + newClippedStart;
        read.setAlignmentStart(newStart);

        break;

      case REVERT_SOFTCLIPPED_BASES:
        read = revertSoftClippedBases(read);
        break;

      default:
        throw new IllegalStateException("Unexpected Clipping operator type " + algorithm);
    }

    return read;
  }