示例#1
0
  /**
   * Calculates the reference coordinate for the end of the read taking into account soft clips but
   * not hard clips.
   *
   * <p>Note: getUnclippedEnd() adds soft and hard clips, this function only adds soft clips.
   *
   * @param read the read
   * @param cigar the read's cigar
   *     <p>Note: this overload of the function takes the cigar as input for speed because getCigar
   *     is an expensive operation. Most callers should use the overload that does not take the
   *     cigar.
   * @return the unclipped end of the read taking soft clips (but not hard clips) into account
   */
  public static int getSoftEnd(final GATKRead read, final Cigar cigar) {
    Utils.nonNull(read, "read");
    Utils.nonNull(cigar, "cigar");

    boolean foundAlignedBase = false;
    int softEnd = read.getEnd();
    final List<CigarElement> cigs = cigar.getCigarElements();
    for (int i = cigs.size() - 1; i >= 0; --i) {
      final CigarElement cig = cigs.get(i);
      final CigarOperator op = cig.getOperator();

      if (op
          == CigarOperator
              .SOFT_CLIP) { // assumes the soft clip that we found is at the end of the aligned read
        softEnd += cig.getLength();
      } else if (op != CigarOperator.HARD_CLIP) {
        foundAlignedBase = true;
        break;
      }
    }
    if (!foundAlignedBase) { // for example 64H14S, the soft end is actually the same as the
                             // alignment end
      softEnd = read.getEnd();
    }
    return softEnd;
  }
示例#2
0
 /**
  * Retrieve the original base qualities of the given read, if present, as stored in the OQ
  * attribute.
  *
  * @param read read to check
  * @return original base qualities as stored in the OQ attribute, or null if the OQ attribute is
  *     not present
  */
 public static byte[] getOriginalBaseQualities(final GATKRead read) {
   if (!read.hasAttribute(ORIGINAL_BASE_QUALITIES_TAG)) {
     return null;
   }
   final String oqString = read.getAttributeAsString(ORIGINAL_BASE_QUALITIES_TAG);
   return oqString.length() > 0 ? SAMUtils.fastqToPhred(oqString) : null;
 }
示例#3
0
  /**
   * Returns the reference index in the given header of the contig of the read's mate, or {@link
   * SAMRecord#NO_ALIGNMENT_REFERENCE_INDEX} if the read's mate is unmapped.
   *
   * @param read read whose mate's reference index to look up
   * @param header SAM header defining contig indices
   * @return the reference index in the given header of the contig of the read's mate, or {@link
   *     SAMRecord#NO_ALIGNMENT_REFERENCE_INDEX} if the read's mate is unmapped.
   */
  public static int getMateReferenceIndex(final GATKRead read, final SAMFileHeader header) {
    if (read.mateIsUnmapped()) {
      return SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
    }

    return header.getSequenceIndex(read.getMateContig());
  }
示例#4
0
 /**
  * Returns the base qualities for the read as a string.
  *
  * @param read read whose base qualities should be returned
  * @return Base qualities string as printable ASCII values (encoded as a FASTQ string).
  */
 public static String getBaseQualityString(final GATKRead read) {
   Utils.nonNull(read);
   if (Arrays.equals(SAMRecord.NULL_QUALS, read.getBaseQualities())) {
     return SAMRecord.NULL_QUALS_STRING;
   }
   return SAMUtils.phredToFastq(read.getBaseQualities());
 }
示例#5
0
 /**
  * If a read ends in INSERTION, returns the last element length.
  *
  * <p>Warning: If the read has Hard or Soft clips after the insertion this function will return 0.
  *
  * @param read
  * @return the length of the last insertion, or 0 if there is none (see warning).
  */
 public static int getLastInsertionOffset(final GATKRead read) {
   final CigarElement e = read.getCigar().getCigarElement(read.getCigar().numCigarElements() - 1);
   if (e.getOperator() == CigarOperator.I) {
     return e.getLength();
   } else {
     return 0;
   }
 }
示例#6
0
  /**
   * Set the base qualities from a string of ASCII encoded values
   *
   * @param read read whose base qualities should be set
   * @param baseQualityString ASCII encoded (encoded as a FASTQ string) values of base qualities.
   */
  public static void setBaseQualityString(final GATKRead read, final String baseQualityString) {
    Utils.nonNull(read);
    Utils.nonNull(baseQualityString);

    if (SAMRecord.NULL_QUALS_STRING.equals(baseQualityString)) {
      read.setBaseQualities(SAMRecord.NULL_QUALS);
    } else {
      read.setBaseQualities(SAMUtils.fastqToPhred(baseQualityString));
    }
  }
示例#7
0
 /**
  * Check to ensure that the alignment makes sense based on the contents of the header.
  *
  * @param header The SAM file header.
  * @param read The read to verify.
  * @return true if alignment agrees with header, false otherwise.
  */
 public static boolean alignmentAgreesWithHeader(final SAMFileHeader header, final GATKRead read) {
   final int referenceIndex = getReferenceIndex(read, header);
   // Read is aligned to nonexistent contig
   if (!read.isUnmapped() && referenceIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
     return false;
   }
   final SAMSequenceRecord contigHeader = header.getSequence(referenceIndex);
   // Read is aligned to a point after the end of the contig
   return read.isUnmapped() || read.getStart() <= contigHeader.getSequenceLength();
 }
示例#8
0
  /**
   * Calculate the maximum read length from the given list of reads.
   *
   * @param reads list of reads
   * @return non-negative integer
   */
  public static int getMaxReadLength(final List<GATKRead> reads) {
    if (reads == null) {
      throw new IllegalArgumentException("Attempting to check a null list of reads.");
    }

    int maxReadLength = 0;
    for (final GATKRead read : reads) {
      maxReadLength = Math.max(maxReadLength, read.getLength());
    }
    return maxReadLength;
  }
示例#9
0
 /**
  * Finds the adaptor boundary around the read and returns the first base inside the adaptor that
  * is closest to the read boundary. If the read is in the positive strand, this is the first base
  * after the end of the fragment (Picard calls it 'insert'), if the read is in the negative
  * strand, this is the first base before the beginning of the fragment.
  *
  * <p>There are two cases we need to treat here:
  *
  * <p>1) Our read is in the reverse strand :
  *
  * <p><----------------------| * |--------------------->
  *
  * <p>in these cases, the adaptor boundary is at the mate start (minus one)
  *
  * <p>2) Our read is in the forward strand :
  *
  * <p>|----------------------> * <----------------------|
  *
  * <p>in these cases the adaptor boundary is at the start of the read plus the inferred insert
  * size (plus one)
  *
  * @param read the read being tested for the adaptor boundary
  * @return the reference coordinate for the adaptor boundary (effectively the first base IN the
  *     adaptor, closest to the read. CANNOT_COMPUTE_ADAPTOR_BOUNDARY if the read is unmapped or
  *     the mate is mapped to another contig.
  */
 public static int getAdaptorBoundary(final GATKRead read) {
   if (!hasWellDefinedFragmentSize(read)) {
     return CANNOT_COMPUTE_ADAPTOR_BOUNDARY;
   } else if (read.isReverseStrand()) {
     return read.getMateStart() - 1; // case 1 (see header)
   } else {
     final int insertSize =
         Math.abs(
             read
                 .getFragmentLength()); // the inferred insert size can be negative if the mate is
                                        // mapped before the read (so we take the absolute value)
     return read.getStart() + insertSize + 1; // case 2 (see header)
   }
 }
示例#10
0
 /** Resets the quality scores of the reads to the orginal (pre-BQSR) ones. */
 public static GATKRead resetOriginalBaseQualities(final GATKRead read) {
   final byte[] originalQuals = ReadUtils.getOriginalBaseQualities(read);
   if (originalQuals != null) {
     read.setBaseQualities(originalQuals);
   }
   return read;
 }
示例#11
0
 public static byte[] getBaseQualities(final GATKRead read, final EventType errorModel) {
   switch (errorModel) {
     case BASE_SUBSTITUTION:
       return read.getBaseQualities();
     case BASE_INSERTION:
       return getBaseInsertionQualities(read);
     case BASE_DELETION:
       return getBaseDeletionQualities(read);
     default:
       throw new GATKException("Unrecognized Base Recalibration type: " + errorModel);
   }
 }
示例#12
0
 /**
  * Default utility to query the base deletion quality of a read. If the read doesn't have one, it
  * creates an array of default qualities (currently Q45) and assigns it to the read.
  *
  * @return the base deletion quality array
  */
 public static byte[] getBaseDeletionQualities(final GATKRead read) {
   byte[] quals = getExistingBaseDeletionQualities(read);
   if (quals == null) {
     quals = new byte[read.getBaseQualities().length];
     Arrays.fill(
         quals,
         DEFAULT_INSERTION_DELETION_QUAL); // Some day in the future when base insertion and base
                                           // deletion quals exist the samtools API will
     // be updated and the original quals will be pulled here, but for now we assume the original
     // quality is a flat Q45
   }
   return quals;
 }
示例#13
0
  /**
   * Creates an "empty" read with the provided read's read group and mate information, but empty
   * (not-null) fields: - Cigar String - Read Bases - Base Qualities
   *
   * <p>Use this method if you want to create a new empty read based on another read
   *
   * @param read a read to copy fields from
   * @return a read with no bases but safe for the GATK
   */
  public static GATKRead emptyRead(final GATKRead read) {
    final GATKRead emptyRead = read.copy();

    emptyRead.setCigar("");
    emptyRead.setBases(new byte[0]);
    emptyRead.setBaseQualities(new byte[0]);

    emptyRead.clearAttributes();
    String readGroup = read.getReadGroup();
    if (readGroup != null) {
      emptyRead.setAttribute(SAMTag.RG.name(), readGroup);
    }

    return emptyRead;
  }
示例#14
0
  /**
   * Calculates the reference coordinate for the beginning of the read taking into account soft
   * clips but not hard clips.
   *
   * <p>Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips.
   *
   * @param read the read
   * @param cigar the read's cigar
   *     <p>Note: this overload of the function takes the cigar as input for speed because getCigar
   *     is an expensive operation. Most callers should use the overload that does not take the
   *     cigar.
   * @return the unclipped start of the read taking soft clips (but not hard clips) into account
   */
  public static int getSoftStart(final GATKRead read, final Cigar cigar) {
    Utils.nonNull(read, "read");
    Utils.nonNull(cigar, "cigar");

    int softStart = read.getStart();
    for (final CigarElement cig : cigar.getCigarElements()) {
      final CigarOperator op = cig.getOperator();

      if (op == CigarOperator.SOFT_CLIP) {
        softStart -= cig.getLength();
      } else if (op != CigarOperator.HARD_CLIP) {
        break;
      }
    }
    return softStart;
  }
示例#15
0
 public static void setDeletionBaseQualities(final GATKRead read, final byte[] quals) {
   read.setAttribute(
       BQSR_BASE_DELETION_QUALITIES, quals == null ? null : SAMUtils.phredToFastq(quals));
 }
示例#16
0
 /**
  * Returns a {@link SAMReadGroupRecord} object corresponding to the provided read's read group.
  *
  * @param read read whose read group to retrieve
  * @param header SAM header containing read groups
  * @return a {@link SAMReadGroupRecord} object corresponding to the provided read's read group, or
  *     null if the read has no read group
  */
 public static SAMReadGroupRecord getSAMReadGroupRecord(
     final GATKRead read, final SAMFileHeader header) {
   final String readGroupName = read.getReadGroup();
   return readGroupName != null ? header.getReadGroup(readGroupName) : null;
 }
示例#17
0
 public static int getReadCoordinateForReferenceCoordinateUpToEndOfRead(
     final GATKRead read, final int refCoord, final ClippingTail tail) {
   final int leftmostSafeVariantPosition = Math.max(getSoftStart(read), refCoord);
   return getReadCoordinateForReferenceCoordinate(
       getSoftStart(read), read.getCigar(), leftmostSafeVariantPosition, tail, false);
 }
示例#18
0
 public static boolean isNonPrimary(GATKRead read) {
   return read.isSecondaryAlignment() || read.isSupplementaryAlignment() || read.isUnmapped();
 }
示例#19
0
 /**
  * Returns the read's unclipped start if the read is on the forward strand, or the read's
  * unclipped end if the read is on the reverse strand.
  *
  * @param read read whose stranded unclipped start to retrieve
  * @return the read's unclipped start if the read is on the forward strand, or the read's
  *     unclipped end if the read is on the reverse strand.
  */
 public static int getStrandedUnclippedStart(final GATKRead read) {
   return read.isReverseStrand() ? read.getUnclippedEnd() : read.getUnclippedStart();
 }
示例#20
0
 /**
  * Calculates the reference coordinate for the beginning of the read taking into account soft
  * clips but not hard clips.
  *
  * <p>Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips.
  *
  * @return the unclipped start of the read taking soft clips (but not hard clips) into account
  */
 public static int getSoftStart(final GATKRead read) {
   Utils.nonNull(read);
   return getSoftStart(read, read.getCigar());
 }
示例#21
0
 /**
  * @param read read to check
  * @return true if the read is paired and has a mapped mate, otherwise false
  */
 public static boolean readHasMappedMate(final GATKRead read) {
   return read.isPaired() && !read.mateIsUnmapped();
 }
示例#22
0
  /**
   * Can the adaptor sequence of read be reliably removed from the read based on the alignment of
   * read and its mate?
   *
   * @param read the read to check
   * @return true if it can, false otherwise
   */
  public static boolean hasWellDefinedFragmentSize(final GATKRead read) {
    if (read.getFragmentLength() == 0)
    // no adaptors in reads with mates in another chromosome or unmapped pairs
    {
      return false;
    }
    if (!read.isPaired())
    // only reads that are paired can be adaptor trimmed
    {
      return false;
    }
    if (read.isUnmapped() || read.mateIsUnmapped())
    // only reads when both reads are mapped can be trimmed
    {
      return false;
    }
    //        if ( ! read.isProperlyPaired() )
    //            // note this flag isn't always set properly in BAMs, can will stop us from
    // eliminating some proper pairs
    //            // reads that aren't part of a proper pair (i.e., have strange alignments) can't
    // be trimmed
    //            return false;
    if (read.isReverseStrand() == read.mateIsReverseStrand())
    // sanity check on isProperlyPaired to ensure that read1 and read2 aren't on the same strand
    {
      return false;
    }

    if (read.isReverseStrand()) {
      // we're on the negative strand, so our read runs right to left
      return read.getEnd() > read.getMateStart();
    } else {
      // we're on the positive strand, so our mate should be to our right (his start + insert size
      // should be past our start)
      return read.getStart() <= read.getMateStart() + read.getFragmentLength();
    }
  }
示例#23
0
 /**
  * @return whether or not this read has base insertion or deletion qualities (one of the two is
  *     sufficient to return true)
  */
 public static boolean hasBaseIndelQualities(final GATKRead read) {
   return read.hasAttribute(BQSR_BASE_INSERTION_QUALITIES)
       || read.hasAttribute(BQSR_BASE_DELETION_QUALITIES);
 }
示例#24
0
 /** @return the base deletion quality or null if read doesn't have one */
 public static byte[] getExistingBaseDeletionQualities(final GATKRead read) {
   return SAMUtils.fastqToPhred(read.getAttributeAsString(BQSR_BASE_DELETION_QUALITIES));
 }
示例#25
0
 /**
  * Pre-processes the results of {@link #getReadCoordinateForReferenceCoordinate(int, Cigar, int,
  * boolean)} to take care of two corner cases:
  *
  * <p>1. If clipping the right tail (end of the read) getReadCoordinateForReferenceCoordinate and
  * fall inside a deletion return the base after the deletion. If clipping the left tail (beginning
  * of the read) it doesn't matter because it already returns the previous base by default.
  *
  * <p>2. If clipping the left tail (beginning of the read) getReadCoordinateForReferenceCoordinate
  * and the read starts with an insertion, and you're requesting the first read based coordinate,
  * it will skip the leading insertion (because it has the same reference coordinate as the
  * following base).
  *
  * @return the read coordinate corresponding to the requested reference coordinate for clipping.
  */
 public static int getReadCoordinateForReferenceCoordinate(
     final GATKRead read, final int refCoord, final ClippingTail tail) {
   return getReadCoordinateForReferenceCoordinate(
       getSoftStart(read), read.getCigar(), refCoord, tail, false);
 }
示例#26
0
 /**
  * Is a base inside a read?
  *
  * @param read the read to evaluate
  * @param referenceCoordinate the reference coordinate of the base to test
  * @return true if it is inside the read, false otherwise.
  */
 public static boolean isInsideRead(final GATKRead read, final int referenceCoordinate) {
   return referenceCoordinate >= read.getStart() && referenceCoordinate <= read.getEnd();
 }
示例#27
0
 /**
  * Returns the reverse complement of the read bases
  *
  * @param read the read
  * @return the reverse complement of the read bases
  */
 public static String getBasesReverseComplement(final GATKRead read) {
   return getBasesReverseComplement(read.getBases());
 }
示例#28
0
 /**
  * Calculates the reference coordinate for the end of the read taking into account soft clips but
  * not hard clips.
  *
  * <p>Note: getUnclippedEnd() adds soft and hard clips, this function only adds soft clips.
  *
  * @return the unclipped end of the read taking soft clips (but not hard clips) into account
  */
 public static int getSoftEnd(final GATKRead read) {
   return getSoftEnd(read, read.getCigar());
 }
示例#29
0
  /**
   * Construct a set of SAM bitwise flags from a GATKRead
   *
   * @param read read from which to construct the flags
   * @return SAM-compliant set of bitwise flags reflecting the properties in the given read
   */
  public static int getSAMFlagsForRead(final GATKRead read) {
    int samFlags = 0;

    if (read.isPaired()) {
      samFlags |= SAM_READ_PAIRED_FLAG;
    }
    if (read.isProperlyPaired()) {
      samFlags |= SAM_PROPER_PAIR_FLAG;
    }
    if (read.isUnmapped()) {
      samFlags |= SAM_READ_UNMAPPED_FLAG;
    }
    if (read.isPaired() && read.mateIsUnmapped()) {
      samFlags |= SAM_MATE_UNMAPPED_FLAG;
    }
    if (!read.isUnmapped() && read.isReverseStrand()) {
      samFlags |= SAM_READ_STRAND_FLAG;
    }
    if (read.isPaired() && !read.mateIsUnmapped() && read.mateIsReverseStrand()) {
      samFlags |= SAM_MATE_STRAND_FLAG;
    }
    if (read.isFirstOfPair()) {
      samFlags |= SAM_FIRST_OF_PAIR_FLAG;
    }
    if (read.isSecondOfPair()) {
      samFlags |= SAM_SECOND_OF_PAIR_FLAG;
    }
    if (read.isSecondaryAlignment()) {
      samFlags |= SAM_NOT_PRIMARY_ALIGNMENT_FLAG;
    }
    if (read.failsVendorQualityCheck()) {
      samFlags |= SAM_READ_FAILS_VENDOR_QUALITY_CHECK_FLAG;
    }
    if (read.isDuplicate()) {
      samFlags |= SAM_DUPLICATE_READ_FLAG;
    }
    if (read.isSupplementaryAlignment()) {
      samFlags |= SAM_SUPPLEMENTARY_ALIGNMENT_FLAG;
    }

    return samFlags;
  }