public static int getReadCoordinateForReferenceCoordinate( final int alignmentStart, final Cigar cigar, final int refCoord, final ClippingTail tail, final boolean allowGoalNotReached) { Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate( alignmentStart, cigar, refCoord, allowGoalNotReached); int readCoord = result.getFirst(); // Corner case one: clipping the right tail and falls on deletion, move to the next // read coordinate. It is not a problem for the left tail because the default answer // from getReadCoordinateForReferenceCoordinate is to give the previous read coordinate. if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL) readCoord++; // clipping the left tail and first base is insertion, go to the next read coordinate // with the same reference coordinate. Advance to the next cigar element, or to the // end of the read if there is no next element. Pair<Boolean, CigarElement> firstElementIsInsertion = readStartsWithInsertion(cigar); if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion.getFirst()) readCoord = Math.min(firstElementIsInsertion.getSecond().getLength(), cigar.getReadLength() - 1); return readCoord; }
public static Pair<Boolean, CigarElement> readStartsWithInsertion(final Cigar cigar) { for (CigarElement cigarElement : cigar.getCigarElements()) { if (cigarElement.getOperator() == CigarOperator.INSERTION) return new Pair<Boolean, CigarElement>(true, cigarElement); else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP) break; } return new Pair<Boolean, CigarElement>(false, null); }
private static boolean isAfterOp( final Cigar cigar, final int currentOperatorIndex, final CigarOperator op, final boolean isFirstOp, final boolean isFirstBaseOfOp) { return !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex - 1).getOperator() == op; }
public static void main(String[] args) { args = new String[] { "/commun/data/users/cfaucheron/aln_20120329/S0529/data_S0529/S0529_sort.nodup.bam" }; ReferenceSequenceFile rsf = ReferenceSequenceFileFactory.getReferenceSequenceFile( new File("/commun/data/pubdb/ucsc/hg19/chromosomes/hg19.fa")); int count = 0; for (String filename : args) { File file = new File(filename); SAMFileReader samIn = new SAMFileReader(file); SAMRecordIterator r = samIn.iterator(); while (r.hasNext()) { SAMRecord rec = r.next(); if (rec.getReadUnmappedFlag()) continue; if (++count > 10000) break; if (rec.getAlignmentStart() > rec.getAlignmentEnd()) throw new IllegalStateException(); byte bases[] = rsf.getSubsequenceAt( rec.getReferenceName(), rec.getAlignmentStart(), Math.max( rec.getAlignmentEnd(), rec.getAlignmentStart() + rec.getCigar().getPaddedReferenceLength())) .getBases(); Iterator<CigarAlignment> i = CigarAlignment.iterator(rec); /*System.err.println(rec.getCigarString()); System.err.println(bases.length); System.err.println("start:"+rec.getAlignmentStart());*/ StringBuilder s1 = new StringBuilder(); StringBuilder s2 = new StringBuilder(); while (i.hasNext()) { CigarAlignment caln = i.next(); /* System.err.println(rec.getCigarString()); System.err.println("bases.length:"+bases.length); System.err.println("refpos:"+caln.getReferencePosition1()); System.err.println("readpos:"+rec.getAlignmentStart()); */ if (caln.getReferencePosition1() - rec.getAlignmentStart() >= bases.length) { System.out.println("SHORT!"); System.out.println("op:" + caln.getCigarOperator()); System.out.println("read start:" + rec.getAlignmentStart()); System.out.println("clan.pos1:" + caln.getReferencePosition1()); System.out.println("read end:" + rec.getAlignmentEnd()); System.out.println("bases.length:" + bases.length); System.out.println( "getPaddedReferenceLength:" + rec.getCigar().getPaddedReferenceLength()); System.out.println("getReferenceLength:" + rec.getCigar().getReferenceLength()); System.out.println("getReadLength:" + rec.getCigar().getReadLength()); System.out.println( "cigar.read.length:" + Cigar.getReadLength(rec.getCigar().getCigarElements())); count = 2000; break; } if (caln.isInsertRef()) { s2.append("-"); s1.append(caln.getReadBase()); } else if (caln.isDeletionRef()) { s2.append((char) bases[caln.getReferencePosition1() - rec.getAlignmentStart()]); s1.append("-"); } else { s2.append((char) bases[caln.getReferencePosition1() - rec.getAlignmentStart()]); s1.append(caln.getReadBase()); } // System.out.println(s1); // System.out.println(s2); // System.out.println(); } System.out.println( rec.getCigarString() + " " + rec.getReferenceName() + ":" + rec.getAlignmentStart()); System.out.println("ref :" + new String(bases)); System.out.println("read:" + new String(rec.getReadBases())); System.out.println(); System.out.println(s1); System.out.println(s2); System.out.println(); } samIn.close(); } }
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate( final int alignmentStart, final Cigar cigar, final int refCoord, final boolean allowGoalNotReached) { int readBases = 0; int refBases = 0; boolean fallsInsideDeletion = false; int goal = refCoord - alignmentStart; // The goal is to move this many reference bases if (goal < 0) { if (allowGoalNotReached) { return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false); } else { throw new ReviewedStingException( "Somehow the requested coordinate is not covered by the read. Too many deletions?"); } } boolean goalReached = refBases == goal; Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator(); while (!goalReached && cigarElementIterator.hasNext()) { CigarElement cigarElement = cigarElementIterator.next(); int shift = 0; if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) { if (refBases + cigarElement.getLength() < goal) shift = cigarElement.getLength(); else shift = goal - refBases; refBases += shift; } goalReached = refBases == goal; if (!goalReached && cigarElement.getOperator().consumesReadBases()) readBases += cigarElement.getLength(); if (goalReached) { // Is this base's reference position within this cigar element? Or did we use it all? boolean endsWithinCigar = shift < cigarElement.getLength(); // If it isn't, we need to check the next one. There should *ALWAYS* be a next one // since we checked if the goal coordinate is within the read length, so this is just a // sanity check. if (!endsWithinCigar && !cigarElementIterator.hasNext()) { if (allowGoalNotReached) { return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false); } else { throw new ReviewedStingException( "Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio"); } } CigarElement nextCigarElement; // if we end inside the current cigar element, we just have to check if it is a deletion if (endsWithinCigar) fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION; // if we end outside the current cigar element, we need to check if the next element is an // insertion or deletion. else { nextCigarElement = cigarElementIterator.next(); // if it's an insertion, we need to clip the whole insertion before looking at the next // element if (nextCigarElement.getOperator() == CigarOperator.INSERTION) { readBases += nextCigarElement.getLength(); if (!cigarElementIterator.hasNext()) { if (allowGoalNotReached) { return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false); } else { throw new ReviewedStingException( "Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio"); } } nextCigarElement = cigarElementIterator.next(); } // if it's a deletion, we will pass the information on to be handled downstream. fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION; } // If we reached our goal outside a deletion, add the shift if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases()) readBases += shift; // If we reached our goal inside a deletion, but the deletion is the next cigar element then // we need // to add the shift of the current cigar element but go back to it's last element to return // the last // base before the deletion (see warning in function contracts) else if (fallsInsideDeletion && !endsWithinCigar) readBases += shift - 1; // If we reached our goal inside a deletion then we must backtrack to the last base before // the deletion else if (fallsInsideDeletion && endsWithinCigar) readBases--; } } if (!goalReached) { if (allowGoalNotReached) { return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false); } else { throw new ReviewedStingException( "Somehow the requested coordinate is not covered by the read. Alignment " + alignmentStart + " | " + cigar); } } return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion); }