Пример #1
0
  /**
   * Returns the coverage distribution of a single read within the desired region.
   *
   * <p>Note: This function counts DELETIONS as coverage (since the main purpose is to downsample
   * reads for variant regions, and deletions count as variants)
   *
   * @param read the read to get the coverage distribution of
   * @param startLocation the first reference coordinate of the region (inclusive)
   * @param stopLocation the last reference coordinate of the region (inclusive)
   * @return an array with the coverage of each position from startLocation to stopLocation
   */
  public static int[] getCoverageDistributionOfRead(
      GATKSAMRecord read, int startLocation, int stopLocation) {
    int[] coverage = new int[stopLocation - startLocation + 1];
    int refLocation = read.getSoftStart();
    for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
      switch (cigarElement.getOperator()) {
        case S:
        case M:
        case EQ:
        case N:
        case X:
        case D:
          for (int i = 0; i < cigarElement.getLength(); i++) {
            if (refLocation >= startLocation && refLocation <= stopLocation) {
              int baseCount =
                  read.isReducedRead()
                      ? read.getReducedCount(refLocation - read.getSoftStart())
                      : 1;
              coverage[refLocation - startLocation] +=
                  baseCount; // this may be a reduced read, so add the proper number of bases
            }
            refLocation++;
          }
          break;

        case P:
        case I:
        case H:
          break;
      }

      if (refLocation > stopLocation) break;
    }
    return coverage;
  }
Пример #2
0
 /**
  * Returns the read coordinate corresponding to the requested reference coordinate.
  *
  * <p>WARNING: if the requested reference coordinate happens to fall inside a deletion in the
  * read, this function will return the last read base before the deletion. This function returns a
  * Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use
  * when faced with a deletion.
  *
  * <p>SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail)
  * instead to get a pre-processed result according to normal clipping needs. Or you can use this
  * function and tailor the behavior to your needs.
  *
  * @param read
  * @param refCoord
  * @return the read coordinate corresponding to the requested reference coordinate. (see warning!)
  */
 @Requires({"refCoord >= read.getSoftStart()", "refCoord <= read.getSoftEnd()"})
 @Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"})
 public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(
     GATKSAMRecord read, int refCoord) {
   return getReadCoordinateForReferenceCoordinate(
       read.getSoftStart(), read.getCigar(), refCoord, false);
 }
Пример #3
0
  public static int getMeanRepresentativeReadCount(GATKSAMRecord read) {
    if (!read.isReducedRead()) return 1;

    // compute mean representative read counts
    final byte[] counts = read.getReducedReadCounts();
    return (int) Math.round((double) MathUtils.sum(counts) / counts.length);
  }
Пример #4
0
  /**
   * is this base inside the adaptor of the read?
   *
   * <p>There are two cases to treat here:
   *
   * <p>1) Read is in the negative strand => Adaptor boundary is on the left tail 2) Read is in the
   * positive strand => Adaptor boundary is on the right tail
   *
   * <p>Note: We return false to all reads that are UNMAPPED or have an weird big insert size
   * (probably due to mismapping or bigger event)
   *
   * @param read the read to test
   * @param basePos base position in REFERENCE coordinates (not read coordinates)
   * @return whether or not the base is in the adaptor
   */
  public static boolean isBaseInsideAdaptor(final GATKSAMRecord read, long basePos) {
    Integer adaptorBoundary = getAdaptorBoundary(read);
    if (adaptorBoundary == null || read.getInferredInsertSize() > DEFAULT_ADAPTOR_SIZE)
      return false;

    return read.getReadNegativeStrandFlag()
        ? basePos <= adaptorBoundary
        : basePos >= adaptorBoundary;
  }
Пример #5
0
 /**
  * Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) to
  * take care of two corner cases:
  *
  * <p>1. If clipping the right tail (end of the read) getReadCoordinateForReferenceCoordinate and
  * fall inside a deletion return the base after the deletion. If clipping the left tail (beginning
  * of the read) it doesn't matter because it already returns the previous base by default.
  *
  * <p>2. If clipping the left tail (beginning of the read) getReadCoordinateForReferenceCoordinate
  * and the read starts with an insertion, and you're requesting the first read based coordinate,
  * it will skip the leading insertion (because it has the same reference coordinate as the
  * following base).
  *
  * @param read
  * @param refCoord
  * @param tail
  * @return the read coordinate corresponding to the requested reference coordinate for clipping.
  */
 @Requires({
   "refCoord >= read.getUnclippedStart()",
   "refCoord <= read.getUnclippedEnd() || (read.getUnclippedEnd() < read.getUnclippedStart())"
 })
 @Ensures({"result >= 0", "result < read.getReadLength()"})
 public static int getReadCoordinateForReferenceCoordinate(
     GATKSAMRecord read, int refCoord, ClippingTail tail) {
   return getReadCoordinateForReferenceCoordinate(
       read.getSoftStart(), read.getCigar(), refCoord, tail, false);
 }
 private GATKSAMRecord makeRead(final int fragmentSize, final int mateStart) {
   final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};
   final byte[] quals = {30, 30, 30, 30, 30, 30, 30, 30};
   final String cigar = "8M";
   GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, cigar);
   read.setProperPairFlag(true);
   read.setReadPairedFlag(true);
   read.setMateAlignmentStart(mateStart);
   read.setInferredInsertSize(fragmentSize);
   return read;
 }
 @Test(enabled = true)
 public void testGetBasesReverseComplement() {
   int iterations = 1000;
   Random random = GenomeAnalysisEngine.getRandomGenerator();
   while (iterations-- > 0) {
     final int l = random.nextInt(1000);
     GATKSAMRecord read = GATKSAMRecord.createRandomRead(l);
     byte[] original = read.getReadBases();
     byte[] reconverted = new byte[l];
     String revComp = ReadUtils.getBasesReverseComplement(read);
     for (int i = 0; i < l; i++) {
       reconverted[l - 1 - i] = BaseUtils.getComplement((byte) revComp.charAt(i));
     }
     Assert.assertEquals(reconverted, original);
   }
 }
Пример #8
0
  /**
   * Creates a map with each event in the read (cigar operator) and the read coordinate where it
   * happened.
   *
   * <p>Example: D -> 2, 34, 75 I -> 55 S -> 0, 101 H -> 101
   *
   * @param read the read
   * @return a map with the properties described above. See example
   */
  public static Map<CigarOperator, ArrayList<Integer>> getCigarOperatorForAllBases(
      GATKSAMRecord read) {
    Map<CigarOperator, ArrayList<Integer>> events =
        new HashMap<CigarOperator, ArrayList<Integer>>();

    int position = 0;
    for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
      CigarOperator op = cigarElement.getOperator();
      if (op.consumesReadBases()) {
        ArrayList<Integer> list = events.get(op);
        if (list == null) {
          list = new ArrayList<Integer>();
          events.put(op, list);
        }
        for (int i = position; i < cigarElement.getLength(); i++) list.add(position++);
      } else {
        ArrayList<Integer> list = events.get(op);
        if (list == null) {
          list = new ArrayList<Integer>();
          events.put(op, list);
        }
        list.add(position);
      }
    }
    return events;
  }
  @Test(enabled = true)
  public void testReadWithNsRefAfterDeletion() throws FileNotFoundException {

    final IndexedFastaSequenceFile seq =
        new CachingIndexedFastaSequenceFile(new File(b37KGReference));
    final SAMFileHeader header =
        ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
    final int readLength = 76;

    final GATKSAMRecord read =
        ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 8975, readLength);
    read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
    read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
    read.setCigarString("3M414N1D73M");

    final int result =
        ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(
            read, 9393, ReadUtils.ClippingTail.LEFT_TAIL);
    Assert.assertEquals(result, 3);
  }
Пример #10
0
  /**
   * Calculates the reference coordinate for a read coordinate
   *
   * @param read the read
   * @param offset the base in the read (coordinate in the read)
   * @return the reference coordinate correspondent to this base
   */
  public static long getReferenceCoordinateForReadCoordinate(GATKSAMRecord read, int offset) {
    if (offset > read.getReadLength())
      throw new ReviewedStingException(
          String.format(OFFSET_OUT_OF_BOUNDS_EXCEPTION, offset, read.getReadLength()));

    long location = read.getAlignmentStart();
    Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
    while (offset > 0 && cigarElementIterator.hasNext()) {
      CigarElement cigarElement = cigarElementIterator.next();
      long move = 0;
      if (cigarElement.getOperator().consumesReferenceBases())
        move = (long) Math.min(cigarElement.getLength(), offset);
      location += move;
      offset -= move;
    }
    if (offset > 0 && !cigarElementIterator.hasNext())
      throw new ReviewedStingException(OFFSET_NOT_ZERO_EXCEPTION);

    return location;
  }
Пример #11
0
  /**
   * Determines what is the position of the read in relation to the interval. Note: This function
   * uses the UNCLIPPED ENDS of the reads for the comparison.
   *
   * @param read the read
   * @param interval the interval
   * @return the overlap type as described by ReadAndIntervalOverlap enum (see above)
   */
  public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(
      GATKSAMRecord read, GenomeLoc interval) {

    int sStart = read.getSoftStart();
    int sStop = read.getSoftEnd();
    int uStart = read.getUnclippedStart();
    int uStop = read.getUnclippedEnd();

    if (!read.getReferenceName().equals(interval.getContig()))
      return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
    else if (uStop < interval.getStart()) return ReadAndIntervalOverlap.NO_OVERLAP_LEFT;
    else if (uStart > interval.getStop()) return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT;
    else if (sStop < interval.getStart()) return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT;
    else if (sStart > interval.getStop())
      return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT;
    else if ((sStart >= interval.getStart()) && (sStop <= interval.getStop()))
      return ReadAndIntervalOverlap.OVERLAP_CONTAINED;
    else if ((sStart < interval.getStart()) && (sStop > interval.getStop()))
      return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT;
    else if ((sStart < interval.getStart())) return ReadAndIntervalOverlap.OVERLAP_LEFT;
    else return ReadAndIntervalOverlap.OVERLAP_RIGHT;
  }
Пример #12
0
  // copied from LocusViewTemplate
  protected GATKSAMRecord buildSAMRecord(
      final String readName, final String contig, final int alignmentStart) {
    GATKSAMRecord record = new GATKSAMRecord(header);

    record.setReadName(readName);
    record.setReferenceIndex(dictionary.getSequenceIndex(contig));
    record.setAlignmentStart(alignmentStart);

    record.setCigarString("1M");
    record.setReadString("A");
    record.setBaseQualityString("A");
    record.setReadGroup(readGroup);

    return record;
  }
  @DataProvider(name = "HasWellDefinedFragmentSizeData")
  public Object[][] makeHasWellDefinedFragmentSizeData() throws Exception {
    final List<Object[]> tests = new LinkedList<Object[]>();

    // setup a basic read that will work
    final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader();
    final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, 10);
    read.setReadPairedFlag(true);
    read.setProperPairFlag(true);
    read.setReadUnmappedFlag(false);
    read.setMateUnmappedFlag(false);
    read.setAlignmentStart(100);
    read.setCigarString("50M");
    read.setMateAlignmentStart(130);
    read.setInferredInsertSize(80);
    read.setFirstOfPairFlag(true);
    read.setReadNegativeStrandFlag(false);
    read.setMateNegativeStrandFlag(true);

    tests.add(new Object[] {"basic case", read.clone(), true});

    {
      final GATKSAMRecord bad1 = (GATKSAMRecord) read.clone();
      bad1.setReadPairedFlag(false);
      tests.add(new Object[] {"not paired", bad1, false});
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setProperPairFlag(false);
      // we currently don't require the proper pair flag to be set
      tests.add(new Object[] {"not proper pair", bad, true});
      //            tests.add( new Object[]{ "not proper pair", bad, false });
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setReadUnmappedFlag(true);
      tests.add(new Object[] {"read is unmapped", bad, false});
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setMateUnmappedFlag(true);
      tests.add(new Object[] {"mate is unmapped", bad, false});
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setMateNegativeStrandFlag(false);
      tests.add(new Object[] {"read and mate both on positive strand", bad, false});
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setReadNegativeStrandFlag(true);
      tests.add(new Object[] {"read and mate both on negative strand", bad, false});
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setInferredInsertSize(0);
      tests.add(new Object[] {"insert size is 0", bad, false});
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setAlignmentStart(1000);
      tests.add(new Object[] {"positve read starts after mate end", bad, false});
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setReadNegativeStrandFlag(true);
      bad.setMateNegativeStrandFlag(false);
      bad.setMateAlignmentStart(1000);
      tests.add(new Object[] {"negative strand read ends before mate starts", bad, false});
    }

    return tests.toArray(new Object[][] {});
  }
Пример #14
0
 /**
  * Is this read all insertion?
  *
  * @param read
  * @return whether or not the only element in the cigar string is an Insertion
  */
 public static boolean readIsEntirelyInsertion(GATKSAMRecord read) {
   for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
     if (cigarElement.getOperator() != CigarOperator.INSERTION) return false;
   }
   return true;
 }
Пример #15
0
 /**
  * Is a base inside a read?
  *
  * @param read the read to evaluate
  * @param referenceCoordinate the reference coordinate of the base to test
  * @return true if it is inside the read, false otherwise.
  */
 public static boolean isInsideRead(final GATKSAMRecord read, final int referenceCoordinate) {
   return referenceCoordinate >= read.getAlignmentStart()
       && referenceCoordinate <= read.getAlignmentEnd();
 }
  @Test(dataProvider = "AdaptorGetter")
  public void testGetAdaptorBoundary(final GetAdaptorFunc get) {
    final int fragmentSize = 10;
    final int mateStart = 1000;
    final int BEFORE = mateStart - 2;
    final int AFTER = mateStart + 2;
    int myStart, boundary;
    GATKSAMRecord read;

    // Test case 1: positive strand, first read
    read = makeRead(fragmentSize, mateStart);
    myStart = BEFORE;
    read.setAlignmentStart(myStart);
    read.setReadNegativeStrandFlag(false);
    read.setMateNegativeStrandFlag(true);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, myStart + fragmentSize + 1);

    // Test case 2: positive strand, second read
    read = makeRead(fragmentSize, mateStart);
    myStart = AFTER;
    read.setAlignmentStart(myStart);
    read.setReadNegativeStrandFlag(false);
    read.setMateNegativeStrandFlag(true);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, myStart + fragmentSize + 1);

    // Test case 3: negative strand, second read
    read = makeRead(fragmentSize, mateStart);
    myStart = AFTER;
    read.setAlignmentStart(myStart);
    read.setReadNegativeStrandFlag(true);
    read.setMateNegativeStrandFlag(false);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, mateStart - 1);

    // Test case 4: negative strand, first read
    read = makeRead(fragmentSize, mateStart);
    myStart = BEFORE;
    read.setAlignmentStart(myStart);
    read.setReadNegativeStrandFlag(true);
    read.setMateNegativeStrandFlag(false);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, mateStart - 1);

    // Test case 5: mate is mapped to another chromosome (test both strands)
    read = makeRead(fragmentSize, mateStart);
    read.setInferredInsertSize(0);
    read.setReadNegativeStrandFlag(true);
    read.setMateNegativeStrandFlag(false);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
    read.setReadNegativeStrandFlag(false);
    read.setMateNegativeStrandFlag(true);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
    read.setInferredInsertSize(10);

    // Test case 6: read is unmapped
    read = makeRead(fragmentSize, mateStart);
    read.setReadUnmappedFlag(true);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
    read.setReadUnmappedFlag(false);

    // Test case 7:  reads don't overlap and look like this:
    //    <--------|
    //                 |------>
    // first read:
    read = makeRead(fragmentSize, mateStart);
    myStart = 980;
    read.setAlignmentStart(myStart);
    read.setInferredInsertSize(20);
    read.setReadNegativeStrandFlag(true);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);

    // second read:
    read = makeRead(fragmentSize, mateStart);
    myStart = 1000;
    read.setAlignmentStart(myStart);
    read.setInferredInsertSize(20);
    read.setMateAlignmentStart(980);
    read.setReadNegativeStrandFlag(false);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);

    // Test case 8: read doesn't have proper pair flag set
    read = makeRead(fragmentSize, mateStart);
    read.setReadPairedFlag(true);
    read.setProperPairFlag(false);
    Assert.assertEquals(get.getAdaptor(read), ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);

    // Test case 9: read and mate have same negative flag setting
    for (final boolean negFlag : Arrays.asList(true, false)) {
      read = makeRead(fragmentSize, mateStart);
      read.setAlignmentStart(BEFORE);
      read.setReadPairedFlag(true);
      read.setProperPairFlag(true);
      read.setReadNegativeStrandFlag(negFlag);
      read.setMateNegativeStrandFlag(!negFlag);
      Assert.assertTrue(
          get.getAdaptor(read) != ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY,
          "Get adaptor should have succeeded");

      read = makeRead(fragmentSize, mateStart);
      read.setAlignmentStart(BEFORE);
      read.setReadPairedFlag(true);
      read.setProperPairFlag(true);
      read.setReadNegativeStrandFlag(negFlag);
      read.setMateNegativeStrandFlag(negFlag);
      Assert.assertEquals(
          get.getAdaptor(read),
          ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY,
          "Get adaptor should have failed for reads with bad alignment orientation");
    }
  }
Пример #17
0
 /**
  * Checks if a read starts with an insertion. It looks beyond Hard and Soft clips if there are
  * any.
  *
  * @param read
  * @return A pair with the answer (true/false) and the element or null if it doesn't exist
  */
 public static Pair<Boolean, CigarElement> readStartsWithInsertion(GATKSAMRecord read) {
   return readStartsWithInsertion(read.getCigar());
 }