/** * Returns the coverage distribution of a single read within the desired region. * * <p>Note: This function counts DELETIONS as coverage (since the main purpose is to downsample * reads for variant regions, and deletions count as variants) * * @param read the read to get the coverage distribution of * @param startLocation the first reference coordinate of the region (inclusive) * @param stopLocation the last reference coordinate of the region (inclusive) * @return an array with the coverage of each position from startLocation to stopLocation */ public static int[] getCoverageDistributionOfRead( GATKSAMRecord read, int startLocation, int stopLocation) { int[] coverage = new int[stopLocation - startLocation + 1]; int refLocation = read.getSoftStart(); for (CigarElement cigarElement : read.getCigar().getCigarElements()) { switch (cigarElement.getOperator()) { case S: case M: case EQ: case N: case X: case D: for (int i = 0; i < cigarElement.getLength(); i++) { if (refLocation >= startLocation && refLocation <= stopLocation) { int baseCount = read.isReducedRead() ? read.getReducedCount(refLocation - read.getSoftStart()) : 1; coverage[refLocation - startLocation] += baseCount; // this may be a reduced read, so add the proper number of bases } refLocation++; } break; case P: case I: case H: break; } if (refLocation > stopLocation) break; } return coverage; }
/** * Creates a map with each event in the read (cigar operator) and the read coordinate where it * happened. * * <p>Example: D -> 2, 34, 75 I -> 55 S -> 0, 101 H -> 101 * * @param read the read * @return a map with the properties described above. See example */ public static Map<CigarOperator, ArrayList<Integer>> getCigarOperatorForAllBases( GATKSAMRecord read) { Map<CigarOperator, ArrayList<Integer>> events = new HashMap<CigarOperator, ArrayList<Integer>>(); int position = 0; for (CigarElement cigarElement : read.getCigar().getCigarElements()) { CigarOperator op = cigarElement.getOperator(); if (op.consumesReadBases()) { ArrayList<Integer> list = events.get(op); if (list == null) { list = new ArrayList<Integer>(); events.put(op, list); } for (int i = position; i < cigarElement.getLength(); i++) list.add(position++); } else { ArrayList<Integer> list = events.get(op); if (list == null) { list = new ArrayList<Integer>(); events.put(op, list); } list.add(position); } } return events; }
/** * Returns the read coordinate corresponding to the requested reference coordinate. * * <p>WARNING: if the requested reference coordinate happens to fall inside a deletion in the * read, this function will return the last read base before the deletion. This function returns a * Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use * when faced with a deletion. * * <p>SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail) * instead to get a pre-processed result according to normal clipping needs. Or you can use this * function and tailor the behavior to your needs. * * @param read * @param refCoord * @return the read coordinate corresponding to the requested reference coordinate. (see warning!) */ @Requires({"refCoord >= read.getSoftStart()", "refCoord <= read.getSoftEnd()"}) @Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"}) public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate( GATKSAMRecord read, int refCoord) { return getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refCoord, false); }
/** * Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) to * take care of two corner cases: * * <p>1. If clipping the right tail (end of the read) getReadCoordinateForReferenceCoordinate and * fall inside a deletion return the base after the deletion. If clipping the left tail (beginning * of the read) it doesn't matter because it already returns the previous base by default. * * <p>2. If clipping the left tail (beginning of the read) getReadCoordinateForReferenceCoordinate * and the read starts with an insertion, and you're requesting the first read based coordinate, * it will skip the leading insertion (because it has the same reference coordinate as the * following base). * * @param read * @param refCoord * @param tail * @return the read coordinate corresponding to the requested reference coordinate for clipping. */ @Requires({ "refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd() || (read.getUnclippedEnd() < read.getUnclippedStart())" }) @Ensures({"result >= 0", "result < read.getReadLength()"}) public static int getReadCoordinateForReferenceCoordinate( GATKSAMRecord read, int refCoord, ClippingTail tail) { return getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refCoord, tail, false); }
/** * Calculates the reference coordinate for a read coordinate * * @param read the read * @param offset the base in the read (coordinate in the read) * @return the reference coordinate correspondent to this base */ public static long getReferenceCoordinateForReadCoordinate(GATKSAMRecord read, int offset) { if (offset > read.getReadLength()) throw new ReviewedStingException( String.format(OFFSET_OUT_OF_BOUNDS_EXCEPTION, offset, read.getReadLength())); long location = read.getAlignmentStart(); Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator(); while (offset > 0 && cigarElementIterator.hasNext()) { CigarElement cigarElement = cigarElementIterator.next(); long move = 0; if (cigarElement.getOperator().consumesReferenceBases()) move = (long) Math.min(cigarElement.getLength(), offset); location += move; offset -= move; } if (offset > 0 && !cigarElementIterator.hasNext()) throw new ReviewedStingException(OFFSET_NOT_ZERO_EXCEPTION); return location; }
/** * Checks if a read starts with an insertion. It looks beyond Hard and Soft clips if there are * any. * * @param read * @return A pair with the answer (true/false) and the element or null if it doesn't exist */ public static Pair<Boolean, CigarElement> readStartsWithInsertion(GATKSAMRecord read) { return readStartsWithInsertion(read.getCigar()); }
/** * Is this read all insertion? * * @param read * @return whether or not the only element in the cigar string is an Insertion */ public static boolean readIsEntirelyInsertion(GATKSAMRecord read) { for (CigarElement cigarElement : read.getCigar().getCigarElements()) { if (cigarElement.getOperator() != CigarOperator.INSERTION) return false; } return true; }