예제 #1
0
  public static int getReadCoordinateForReferenceCoordinate(
      final int alignmentStart,
      final Cigar cigar,
      final int refCoord,
      final ClippingTail tail,
      final boolean allowGoalNotReached) {
    Pair<Integer, Boolean> result =
        getReadCoordinateForReferenceCoordinate(
            alignmentStart, cigar, refCoord, allowGoalNotReached);
    int readCoord = result.getFirst();

    // Corner case one: clipping the right tail and falls on deletion, move to the next
    // read coordinate. It is not a problem for the left tail because the default answer
    // from getReadCoordinateForReferenceCoordinate is to give the previous read coordinate.
    if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL) readCoord++;

    // clipping the left tail and first base is insertion, go to the next read coordinate
    // with the same reference coordinate. Advance to the next cigar element, or to the
    // end of the read if there is no next element.
    Pair<Boolean, CigarElement> firstElementIsInsertion = readStartsWithInsertion(cigar);
    if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion.getFirst())
      readCoord =
          Math.min(firstElementIsInsertion.getSecond().getLength(), cigar.getReadLength() - 1);

    return readCoord;
  }
예제 #2
0
 public static Pair<Boolean, CigarElement> readStartsWithInsertion(final Cigar cigar) {
   for (CigarElement cigarElement : cigar.getCigarElements()) {
     if (cigarElement.getOperator() == CigarOperator.INSERTION)
       return new Pair<Boolean, CigarElement>(true, cigarElement);
     else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP
         && cigarElement.getOperator() != CigarOperator.SOFT_CLIP) break;
   }
   return new Pair<Boolean, CigarElement>(false, null);
 }
 private static boolean isAfterOp(
     final Cigar cigar,
     final int currentOperatorIndex,
     final CigarOperator op,
     final boolean isFirstOp,
     final boolean isFirstBaseOfOp) {
   return !isFirstOp
       && isFirstBaseOfOp
       && cigar.getCigarElement(currentOperatorIndex - 1).getOperator() == op;
 }
예제 #4
0
  public static void main(String[] args) {
    args =
        new String[] {
          "/commun/data/users/cfaucheron/aln_20120329/S0529/data_S0529/S0529_sort.nodup.bam"
        };
    ReferenceSequenceFile rsf =
        ReferenceSequenceFileFactory.getReferenceSequenceFile(
            new File("/commun/data/pubdb/ucsc/hg19/chromosomes/hg19.fa"));
    int count = 0;
    for (String filename : args) {
      File file = new File(filename);
      SAMFileReader samIn = new SAMFileReader(file);
      SAMRecordIterator r = samIn.iterator();
      while (r.hasNext()) {

        SAMRecord rec = r.next();
        if (rec.getReadUnmappedFlag()) continue;

        if (++count > 10000) break;

        if (rec.getAlignmentStart() > rec.getAlignmentEnd()) throw new IllegalStateException();
        byte bases[] =
            rsf.getSubsequenceAt(
                    rec.getReferenceName(),
                    rec.getAlignmentStart(),
                    Math.max(
                        rec.getAlignmentEnd(),
                        rec.getAlignmentStart() + rec.getCigar().getPaddedReferenceLength()))
                .getBases();
        Iterator<CigarAlignment> i = CigarAlignment.iterator(rec);
        /*System.err.println(rec.getCigarString());
        System.err.println(bases.length);
        System.err.println("start:"+rec.getAlignmentStart());*/
        StringBuilder s1 = new StringBuilder();
        StringBuilder s2 = new StringBuilder();

        while (i.hasNext()) {
          CigarAlignment caln = i.next();
          /*
          System.err.println(rec.getCigarString());

          System.err.println("bases.length:"+bases.length);
          System.err.println("refpos:"+caln.getReferencePosition1());
          System.err.println("readpos:"+rec.getAlignmentStart());
          */
          if (caln.getReferencePosition1() - rec.getAlignmentStart() >= bases.length) {
            System.out.println("SHORT!");
            System.out.println("op:" + caln.getCigarOperator());
            System.out.println("read start:" + rec.getAlignmentStart());
            System.out.println("clan.pos1:" + caln.getReferencePosition1());
            System.out.println("read end:" + rec.getAlignmentEnd());
            System.out.println("bases.length:" + bases.length);
            System.out.println(
                "getPaddedReferenceLength:" + rec.getCigar().getPaddedReferenceLength());
            System.out.println("getReferenceLength:" + rec.getCigar().getReferenceLength());
            System.out.println("getReadLength:" + rec.getCigar().getReadLength());
            System.out.println(
                "cigar.read.length:" + Cigar.getReadLength(rec.getCigar().getCigarElements()));
            count = 2000;
            break;
          }
          if (caln.isInsertRef()) {
            s2.append("-");
            s1.append(caln.getReadBase());
          } else if (caln.isDeletionRef()) {
            s2.append((char) bases[caln.getReferencePosition1() - rec.getAlignmentStart()]);
            s1.append("-");
          } else {
            s2.append((char) bases[caln.getReferencePosition1() - rec.getAlignmentStart()]);
            s1.append(caln.getReadBase());
          }
          // System.out.println(s1);
          // System.out.println(s2);
          // System.out.println();
        }
        System.out.println(
            rec.getCigarString() + " " + rec.getReferenceName() + ":" + rec.getAlignmentStart());
        System.out.println("ref :" + new String(bases));
        System.out.println("read:" + new String(rec.getReadBases()));
        System.out.println();
        System.out.println(s1);
        System.out.println(s2);
        System.out.println();
      }
      samIn.close();
    }
  }
예제 #5
0
  public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(
      final int alignmentStart,
      final Cigar cigar,
      final int refCoord,
      final boolean allowGoalNotReached) {
    int readBases = 0;
    int refBases = 0;
    boolean fallsInsideDeletion = false;

    int goal = refCoord - alignmentStart; // The goal is to move this many reference bases
    if (goal < 0) {
      if (allowGoalNotReached) {
        return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false);
      } else {
        throw new ReviewedStingException(
            "Somehow the requested coordinate is not covered by the read. Too many deletions?");
      }
    }
    boolean goalReached = refBases == goal;

    Iterator<CigarElement> cigarElementIterator = cigar.getCigarElements().iterator();
    while (!goalReached && cigarElementIterator.hasNext()) {
      CigarElement cigarElement = cigarElementIterator.next();
      int shift = 0;

      if (cigarElement.getOperator().consumesReferenceBases()
          || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
        if (refBases + cigarElement.getLength() < goal) shift = cigarElement.getLength();
        else shift = goal - refBases;

        refBases += shift;
      }
      goalReached = refBases == goal;

      if (!goalReached && cigarElement.getOperator().consumesReadBases())
        readBases += cigarElement.getLength();

      if (goalReached) {
        // Is this base's reference position within this cigar element? Or did we use it all?
        boolean endsWithinCigar = shift < cigarElement.getLength();

        // If it isn't, we need to check the next one. There should *ALWAYS* be a next one
        // since we checked if the goal coordinate is within the read length, so this is just a
        // sanity check.
        if (!endsWithinCigar && !cigarElementIterator.hasNext()) {
          if (allowGoalNotReached) {
            return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false);
          } else {
            throw new ReviewedStingException(
                "Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
          }
        }

        CigarElement nextCigarElement;

        // if we end inside the current cigar element, we just have to check if it is a deletion
        if (endsWithinCigar)
          fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;

        // if we end outside the current cigar element, we need to check if the next element is an
        // insertion or deletion.
        else {
          nextCigarElement = cigarElementIterator.next();

          // if it's an insertion, we need to clip the whole insertion before looking at the next
          // element
          if (nextCigarElement.getOperator() == CigarOperator.INSERTION) {
            readBases += nextCigarElement.getLength();
            if (!cigarElementIterator.hasNext()) {
              if (allowGoalNotReached) {
                return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false);
              } else {
                throw new ReviewedStingException(
                    "Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
              }
            }

            nextCigarElement = cigarElementIterator.next();
          }

          // if it's a deletion, we will pass the information on to be handled downstream.
          fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
        }

        // If we reached our goal outside a deletion, add the shift
        if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
          readBases += shift;

        // If we reached our goal inside a deletion, but the deletion is the next cigar element then
        // we need
        // to add the shift of the current cigar element but go back to it's last element to return
        // the last
        // base before the deletion (see warning in function contracts)
        else if (fallsInsideDeletion && !endsWithinCigar) readBases += shift - 1;

        // If we reached our goal inside a deletion then we must backtrack to the last base before
        // the deletion
        else if (fallsInsideDeletion && endsWithinCigar) readBases--;
      }
    }

    if (!goalReached) {
      if (allowGoalNotReached) {
        return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false);
      } else {
        throw new ReviewedStingException(
            "Somehow the requested coordinate is not covered by the read. Alignment "
                + alignmentStart
                + " | "
                + cigar);
      }
    }

    return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
  }