/** * Calculates the reference coordinate for the end of the read taking into account soft clips but * not hard clips. * * <p>Note: getUnclippedEnd() adds soft and hard clips, this function only adds soft clips. * * @param read the read * @param cigar the read's cigar * <p>Note: this overload of the function takes the cigar as input for speed because getCigar * is an expensive operation. Most callers should use the overload that does not take the * cigar. * @return the unclipped end of the read taking soft clips (but not hard clips) into account */ public static int getSoftEnd(final GATKRead read, final Cigar cigar) { Utils.nonNull(read, "read"); Utils.nonNull(cigar, "cigar"); boolean foundAlignedBase = false; int softEnd = read.getEnd(); final List<CigarElement> cigs = cigar.getCigarElements(); for (int i = cigs.size() - 1; i >= 0; --i) { final CigarElement cig = cigs.get(i); final CigarOperator op = cig.getOperator(); if (op == CigarOperator .SOFT_CLIP) { // assumes the soft clip that we found is at the end of the aligned read softEnd += cig.getLength(); } else if (op != CigarOperator.HARD_CLIP) { foundAlignedBase = true; break; } } if (!foundAlignedBase) { // for example 64H14S, the soft end is actually the same as the // alignment end softEnd = read.getEnd(); } return softEnd; }
/** * Retrieve the original base qualities of the given read, if present, as stored in the OQ * attribute. * * @param read read to check * @return original base qualities as stored in the OQ attribute, or null if the OQ attribute is * not present */ public static byte[] getOriginalBaseQualities(final GATKRead read) { if (!read.hasAttribute(ORIGINAL_BASE_QUALITIES_TAG)) { return null; } final String oqString = read.getAttributeAsString(ORIGINAL_BASE_QUALITIES_TAG); return oqString.length() > 0 ? SAMUtils.fastqToPhred(oqString) : null; }
/** * Returns the reference index in the given header of the contig of the read's mate, or {@link * SAMRecord#NO_ALIGNMENT_REFERENCE_INDEX} if the read's mate is unmapped. * * @param read read whose mate's reference index to look up * @param header SAM header defining contig indices * @return the reference index in the given header of the contig of the read's mate, or {@link * SAMRecord#NO_ALIGNMENT_REFERENCE_INDEX} if the read's mate is unmapped. */ public static int getMateReferenceIndex(final GATKRead read, final SAMFileHeader header) { if (read.mateIsUnmapped()) { return SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX; } return header.getSequenceIndex(read.getMateContig()); }
/** * Returns the base qualities for the read as a string. * * @param read read whose base qualities should be returned * @return Base qualities string as printable ASCII values (encoded as a FASTQ string). */ public static String getBaseQualityString(final GATKRead read) { Utils.nonNull(read); if (Arrays.equals(SAMRecord.NULL_QUALS, read.getBaseQualities())) { return SAMRecord.NULL_QUALS_STRING; } return SAMUtils.phredToFastq(read.getBaseQualities()); }
/** * If a read ends in INSERTION, returns the last element length. * * <p>Warning: If the read has Hard or Soft clips after the insertion this function will return 0. * * @param read * @return the length of the last insertion, or 0 if there is none (see warning). */ public static int getLastInsertionOffset(final GATKRead read) { final CigarElement e = read.getCigar().getCigarElement(read.getCigar().numCigarElements() - 1); if (e.getOperator() == CigarOperator.I) { return e.getLength(); } else { return 0; } }
/** * Set the base qualities from a string of ASCII encoded values * * @param read read whose base qualities should be set * @param baseQualityString ASCII encoded (encoded as a FASTQ string) values of base qualities. */ public static void setBaseQualityString(final GATKRead read, final String baseQualityString) { Utils.nonNull(read); Utils.nonNull(baseQualityString); if (SAMRecord.NULL_QUALS_STRING.equals(baseQualityString)) { read.setBaseQualities(SAMRecord.NULL_QUALS); } else { read.setBaseQualities(SAMUtils.fastqToPhred(baseQualityString)); } }
/** * Check to ensure that the alignment makes sense based on the contents of the header. * * @param header The SAM file header. * @param read The read to verify. * @return true if alignment agrees with header, false otherwise. */ public static boolean alignmentAgreesWithHeader(final SAMFileHeader header, final GATKRead read) { final int referenceIndex = getReferenceIndex(read, header); // Read is aligned to nonexistent contig if (!read.isUnmapped() && referenceIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { return false; } final SAMSequenceRecord contigHeader = header.getSequence(referenceIndex); // Read is aligned to a point after the end of the contig return read.isUnmapped() || read.getStart() <= contigHeader.getSequenceLength(); }
/** * Calculate the maximum read length from the given list of reads. * * @param reads list of reads * @return non-negative integer */ public static int getMaxReadLength(final List<GATKRead> reads) { if (reads == null) { throw new IllegalArgumentException("Attempting to check a null list of reads."); } int maxReadLength = 0; for (final GATKRead read : reads) { maxReadLength = Math.max(maxReadLength, read.getLength()); } return maxReadLength; }
/** * Finds the adaptor boundary around the read and returns the first base inside the adaptor that * is closest to the read boundary. If the read is in the positive strand, this is the first base * after the end of the fragment (Picard calls it 'insert'), if the read is in the negative * strand, this is the first base before the beginning of the fragment. * * <p>There are two cases we need to treat here: * * <p>1) Our read is in the reverse strand : * * <p><----------------------| * |---------------------> * * <p>in these cases, the adaptor boundary is at the mate start (minus one) * * <p>2) Our read is in the forward strand : * * <p>|----------------------> * <----------------------| * * <p>in these cases the adaptor boundary is at the start of the read plus the inferred insert * size (plus one) * * @param read the read being tested for the adaptor boundary * @return the reference coordinate for the adaptor boundary (effectively the first base IN the * adaptor, closest to the read. CANNOT_COMPUTE_ADAPTOR_BOUNDARY if the read is unmapped or * the mate is mapped to another contig. */ public static int getAdaptorBoundary(final GATKRead read) { if (!hasWellDefinedFragmentSize(read)) { return CANNOT_COMPUTE_ADAPTOR_BOUNDARY; } else if (read.isReverseStrand()) { return read.getMateStart() - 1; // case 1 (see header) } else { final int insertSize = Math.abs( read .getFragmentLength()); // the inferred insert size can be negative if the mate is // mapped before the read (so we take the absolute value) return read.getStart() + insertSize + 1; // case 2 (see header) } }
/** Resets the quality scores of the reads to the orginal (pre-BQSR) ones. */ public static GATKRead resetOriginalBaseQualities(final GATKRead read) { final byte[] originalQuals = ReadUtils.getOriginalBaseQualities(read); if (originalQuals != null) { read.setBaseQualities(originalQuals); } return read; }
public static byte[] getBaseQualities(final GATKRead read, final EventType errorModel) { switch (errorModel) { case BASE_SUBSTITUTION: return read.getBaseQualities(); case BASE_INSERTION: return getBaseInsertionQualities(read); case BASE_DELETION: return getBaseDeletionQualities(read); default: throw new GATKException("Unrecognized Base Recalibration type: " + errorModel); } }
/** * Default utility to query the base deletion quality of a read. If the read doesn't have one, it * creates an array of default qualities (currently Q45) and assigns it to the read. * * @return the base deletion quality array */ public static byte[] getBaseDeletionQualities(final GATKRead read) { byte[] quals = getExistingBaseDeletionQualities(read); if (quals == null) { quals = new byte[read.getBaseQualities().length]; Arrays.fill( quals, DEFAULT_INSERTION_DELETION_QUAL); // Some day in the future when base insertion and base // deletion quals exist the samtools API will // be updated and the original quals will be pulled here, but for now we assume the original // quality is a flat Q45 } return quals; }
/** * Creates an "empty" read with the provided read's read group and mate information, but empty * (not-null) fields: - Cigar String - Read Bases - Base Qualities * * <p>Use this method if you want to create a new empty read based on another read * * @param read a read to copy fields from * @return a read with no bases but safe for the GATK */ public static GATKRead emptyRead(final GATKRead read) { final GATKRead emptyRead = read.copy(); emptyRead.setCigar(""); emptyRead.setBases(new byte[0]); emptyRead.setBaseQualities(new byte[0]); emptyRead.clearAttributes(); String readGroup = read.getReadGroup(); if (readGroup != null) { emptyRead.setAttribute(SAMTag.RG.name(), readGroup); } return emptyRead; }
/** * Calculates the reference coordinate for the beginning of the read taking into account soft * clips but not hard clips. * * <p>Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips. * * @param read the read * @param cigar the read's cigar * <p>Note: this overload of the function takes the cigar as input for speed because getCigar * is an expensive operation. Most callers should use the overload that does not take the * cigar. * @return the unclipped start of the read taking soft clips (but not hard clips) into account */ public static int getSoftStart(final GATKRead read, final Cigar cigar) { Utils.nonNull(read, "read"); Utils.nonNull(cigar, "cigar"); int softStart = read.getStart(); for (final CigarElement cig : cigar.getCigarElements()) { final CigarOperator op = cig.getOperator(); if (op == CigarOperator.SOFT_CLIP) { softStart -= cig.getLength(); } else if (op != CigarOperator.HARD_CLIP) { break; } } return softStart; }
public static void setDeletionBaseQualities(final GATKRead read, final byte[] quals) { read.setAttribute( BQSR_BASE_DELETION_QUALITIES, quals == null ? null : SAMUtils.phredToFastq(quals)); }
/** * Returns a {@link SAMReadGroupRecord} object corresponding to the provided read's read group. * * @param read read whose read group to retrieve * @param header SAM header containing read groups * @return a {@link SAMReadGroupRecord} object corresponding to the provided read's read group, or * null if the read has no read group */ public static SAMReadGroupRecord getSAMReadGroupRecord( final GATKRead read, final SAMFileHeader header) { final String readGroupName = read.getReadGroup(); return readGroupName != null ? header.getReadGroup(readGroupName) : null; }
public static int getReadCoordinateForReferenceCoordinateUpToEndOfRead( final GATKRead read, final int refCoord, final ClippingTail tail) { final int leftmostSafeVariantPosition = Math.max(getSoftStart(read), refCoord); return getReadCoordinateForReferenceCoordinate( getSoftStart(read), read.getCigar(), leftmostSafeVariantPosition, tail, false); }
public static boolean isNonPrimary(GATKRead read) { return read.isSecondaryAlignment() || read.isSupplementaryAlignment() || read.isUnmapped(); }
/** * Returns the read's unclipped start if the read is on the forward strand, or the read's * unclipped end if the read is on the reverse strand. * * @param read read whose stranded unclipped start to retrieve * @return the read's unclipped start if the read is on the forward strand, or the read's * unclipped end if the read is on the reverse strand. */ public static int getStrandedUnclippedStart(final GATKRead read) { return read.isReverseStrand() ? read.getUnclippedEnd() : read.getUnclippedStart(); }
/** * Calculates the reference coordinate for the beginning of the read taking into account soft * clips but not hard clips. * * <p>Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips. * * @return the unclipped start of the read taking soft clips (but not hard clips) into account */ public static int getSoftStart(final GATKRead read) { Utils.nonNull(read); return getSoftStart(read, read.getCigar()); }
/** * @param read read to check * @return true if the read is paired and has a mapped mate, otherwise false */ public static boolean readHasMappedMate(final GATKRead read) { return read.isPaired() && !read.mateIsUnmapped(); }
/** * Can the adaptor sequence of read be reliably removed from the read based on the alignment of * read and its mate? * * @param read the read to check * @return true if it can, false otherwise */ public static boolean hasWellDefinedFragmentSize(final GATKRead read) { if (read.getFragmentLength() == 0) // no adaptors in reads with mates in another chromosome or unmapped pairs { return false; } if (!read.isPaired()) // only reads that are paired can be adaptor trimmed { return false; } if (read.isUnmapped() || read.mateIsUnmapped()) // only reads when both reads are mapped can be trimmed { return false; } // if ( ! read.isProperlyPaired() ) // // note this flag isn't always set properly in BAMs, can will stop us from // eliminating some proper pairs // // reads that aren't part of a proper pair (i.e., have strange alignments) can't // be trimmed // return false; if (read.isReverseStrand() == read.mateIsReverseStrand()) // sanity check on isProperlyPaired to ensure that read1 and read2 aren't on the same strand { return false; } if (read.isReverseStrand()) { // we're on the negative strand, so our read runs right to left return read.getEnd() > read.getMateStart(); } else { // we're on the positive strand, so our mate should be to our right (his start + insert size // should be past our start) return read.getStart() <= read.getMateStart() + read.getFragmentLength(); } }
/** * @return whether or not this read has base insertion or deletion qualities (one of the two is * sufficient to return true) */ public static boolean hasBaseIndelQualities(final GATKRead read) { return read.hasAttribute(BQSR_BASE_INSERTION_QUALITIES) || read.hasAttribute(BQSR_BASE_DELETION_QUALITIES); }
/** @return the base deletion quality or null if read doesn't have one */ public static byte[] getExistingBaseDeletionQualities(final GATKRead read) { return SAMUtils.fastqToPhred(read.getAttributeAsString(BQSR_BASE_DELETION_QUALITIES)); }
/** * Pre-processes the results of {@link #getReadCoordinateForReferenceCoordinate(int, Cigar, int, * boolean)} to take care of two corner cases: * * <p>1. If clipping the right tail (end of the read) getReadCoordinateForReferenceCoordinate and * fall inside a deletion return the base after the deletion. If clipping the left tail (beginning * of the read) it doesn't matter because it already returns the previous base by default. * * <p>2. If clipping the left tail (beginning of the read) getReadCoordinateForReferenceCoordinate * and the read starts with an insertion, and you're requesting the first read based coordinate, * it will skip the leading insertion (because it has the same reference coordinate as the * following base). * * @return the read coordinate corresponding to the requested reference coordinate for clipping. */ public static int getReadCoordinateForReferenceCoordinate( final GATKRead read, final int refCoord, final ClippingTail tail) { return getReadCoordinateForReferenceCoordinate( getSoftStart(read), read.getCigar(), refCoord, tail, false); }
/** * Is a base inside a read? * * @param read the read to evaluate * @param referenceCoordinate the reference coordinate of the base to test * @return true if it is inside the read, false otherwise. */ public static boolean isInsideRead(final GATKRead read, final int referenceCoordinate) { return referenceCoordinate >= read.getStart() && referenceCoordinate <= read.getEnd(); }
/** * Returns the reverse complement of the read bases * * @param read the read * @return the reverse complement of the read bases */ public static String getBasesReverseComplement(final GATKRead read) { return getBasesReverseComplement(read.getBases()); }
/** * Calculates the reference coordinate for the end of the read taking into account soft clips but * not hard clips. * * <p>Note: getUnclippedEnd() adds soft and hard clips, this function only adds soft clips. * * @return the unclipped end of the read taking soft clips (but not hard clips) into account */ public static int getSoftEnd(final GATKRead read) { return getSoftEnd(read, read.getCigar()); }
/** * Construct a set of SAM bitwise flags from a GATKRead * * @param read read from which to construct the flags * @return SAM-compliant set of bitwise flags reflecting the properties in the given read */ public static int getSAMFlagsForRead(final GATKRead read) { int samFlags = 0; if (read.isPaired()) { samFlags |= SAM_READ_PAIRED_FLAG; } if (read.isProperlyPaired()) { samFlags |= SAM_PROPER_PAIR_FLAG; } if (read.isUnmapped()) { samFlags |= SAM_READ_UNMAPPED_FLAG; } if (read.isPaired() && read.mateIsUnmapped()) { samFlags |= SAM_MATE_UNMAPPED_FLAG; } if (!read.isUnmapped() && read.isReverseStrand()) { samFlags |= SAM_READ_STRAND_FLAG; } if (read.isPaired() && !read.mateIsUnmapped() && read.mateIsReverseStrand()) { samFlags |= SAM_MATE_STRAND_FLAG; } if (read.isFirstOfPair()) { samFlags |= SAM_FIRST_OF_PAIR_FLAG; } if (read.isSecondOfPair()) { samFlags |= SAM_SECOND_OF_PAIR_FLAG; } if (read.isSecondaryAlignment()) { samFlags |= SAM_NOT_PRIMARY_ALIGNMENT_FLAG; } if (read.failsVendorQualityCheck()) { samFlags |= SAM_READ_FAILS_VENDOR_QUALITY_CHECK_FLAG; } if (read.isDuplicate()) { samFlags |= SAM_DUPLICATE_READ_FLAG; } if (read.isSupplementaryAlignment()) { samFlags |= SAM_SUPPLEMENTARY_ALIGNMENT_FLAG; } return samFlags; }