/** * Returns the coverage distribution of a single read within the desired region. * * <p>Note: This function counts DELETIONS as coverage (since the main purpose is to downsample * reads for variant regions, and deletions count as variants) * * @param read the read to get the coverage distribution of * @param startLocation the first reference coordinate of the region (inclusive) * @param stopLocation the last reference coordinate of the region (inclusive) * @return an array with the coverage of each position from startLocation to stopLocation */ public static int[] getCoverageDistributionOfRead( GATKSAMRecord read, int startLocation, int stopLocation) { int[] coverage = new int[stopLocation - startLocation + 1]; int refLocation = read.getSoftStart(); for (CigarElement cigarElement : read.getCigar().getCigarElements()) { switch (cigarElement.getOperator()) { case S: case M: case EQ: case N: case X: case D: for (int i = 0; i < cigarElement.getLength(); i++) { if (refLocation >= startLocation && refLocation <= stopLocation) { int baseCount = read.isReducedRead() ? read.getReducedCount(refLocation - read.getSoftStart()) : 1; coverage[refLocation - startLocation] += baseCount; // this may be a reduced read, so add the proper number of bases } refLocation++; } break; case P: case I: case H: break; } if (refLocation > stopLocation) break; } return coverage; }
/** * Returns the read coordinate corresponding to the requested reference coordinate. * * <p>WARNING: if the requested reference coordinate happens to fall inside a deletion in the * read, this function will return the last read base before the deletion. This function returns a * Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use * when faced with a deletion. * * <p>SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail) * instead to get a pre-processed result according to normal clipping needs. Or you can use this * function and tailor the behavior to your needs. * * @param read * @param refCoord * @return the read coordinate corresponding to the requested reference coordinate. (see warning!) */ @Requires({"refCoord >= read.getSoftStart()", "refCoord <= read.getSoftEnd()"}) @Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"}) public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate( GATKSAMRecord read, int refCoord) { return getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refCoord, false); }
/** * Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) to * take care of two corner cases: * * <p>1. If clipping the right tail (end of the read) getReadCoordinateForReferenceCoordinate and * fall inside a deletion return the base after the deletion. If clipping the left tail (beginning * of the read) it doesn't matter because it already returns the previous base by default. * * <p>2. If clipping the left tail (beginning of the read) getReadCoordinateForReferenceCoordinate * and the read starts with an insertion, and you're requesting the first read based coordinate, * it will skip the leading insertion (because it has the same reference coordinate as the * following base). * * @param read * @param refCoord * @param tail * @return the read coordinate corresponding to the requested reference coordinate for clipping. */ @Requires({ "refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd() || (read.getUnclippedEnd() < read.getUnclippedStart())" }) @Ensures({"result >= 0", "result < read.getReadLength()"}) public static int getReadCoordinateForReferenceCoordinate( GATKSAMRecord read, int refCoord, ClippingTail tail) { return getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refCoord, tail, false); }
/** * Determines what is the position of the read in relation to the interval. Note: This function * uses the UNCLIPPED ENDS of the reads for the comparison. * * @param read the read * @param interval the interval * @return the overlap type as described by ReadAndIntervalOverlap enum (see above) */ public static ReadAndIntervalOverlap getReadAndIntervalOverlapType( GATKSAMRecord read, GenomeLoc interval) { int sStart = read.getSoftStart(); int sStop = read.getSoftEnd(); int uStart = read.getUnclippedStart(); int uStop = read.getUnclippedEnd(); if (!read.getReferenceName().equals(interval.getContig())) return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG; else if (uStop < interval.getStart()) return ReadAndIntervalOverlap.NO_OVERLAP_LEFT; else if (uStart > interval.getStop()) return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT; else if (sStop < interval.getStart()) return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT; else if (sStart > interval.getStop()) return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT; else if ((sStart >= interval.getStart()) && (sStop <= interval.getStop())) return ReadAndIntervalOverlap.OVERLAP_CONTAINED; else if ((sStart < interval.getStart()) && (sStop > interval.getStop())) return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT; else if ((sStart < interval.getStart())) return ReadAndIntervalOverlap.OVERLAP_LEFT; else return ReadAndIntervalOverlap.OVERLAP_RIGHT; }