Ejemplo n.º 1
0
  /**
   * Checks if a read contains adaptor sequences. If it does, hard clips them out.
   *
   * <p>Note: To see how a read is checked for adaptor sequence see ReadUtils.getAdaptorBoundary()
   *
   * @return a new read without adaptor sequence
   */
  private GATKSAMRecord hardClipAdaptorSequence() {
    final int adaptorBoundary = ReadUtils.getAdaptorBoundary(read);

    if (adaptorBoundary == ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY
        || !ReadUtils.isInsideRead(read, adaptorBoundary)) return read;

    return read.getReadNegativeStrandFlag()
        ? hardClipByReferenceCoordinatesLeftTail(adaptorBoundary)
        : hardClipByReferenceCoordinatesRightTail(adaptorBoundary);
  }
Ejemplo n.º 2
0
  /**
   * Generic functionality to hard clip a read, used internally by
   * hardClipByReferenceCoordinatesLeftTail and hardClipByReferenceCoordinatesRightTail. Should not
   * be used directly.
   *
   * <p>Note, it REQUIRES you to give the directionality of your hard clip (i.e. whether you're
   * clipping the left of right tail) by specifying either refStart < 0 or refStop < 0.
   *
   * @param refStart first base to clip (inclusive)
   * @param refStop last base to clip (inclusive)
   * @return a new read, without the clipped bases
   */
  @Requires({
    "!read.getReadUnmappedFlag()",
    "refStart < 0 || refStop < 0"
  }) // can't handle unmapped reads, as we're using reference coordinates to clip
  protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
    if (read.isEmpty()) return read;

    int start;
    int stop;

    // Determine the read coordinate to start and stop hard clipping
    if (refStart < 0) {
      if (refStop < 0)
        throw new ReviewedStingException(
            "Only one of refStart or refStop must be < 0, not both ("
                + refStart
                + ", "
                + refStop
                + ")");
      start = 0;
      stop =
          ReadUtils.getReadCoordinateForReferenceCoordinate(
              read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
    } else {
      if (refStop >= 0)
        throw new ReviewedStingException(
            "Either refStart or refStop must be < 0 (" + refStart + ", " + refStop + ")");
      start =
          ReadUtils.getReadCoordinateForReferenceCoordinate(
              read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
      stop = read.getReadLength() - 1;
    }

    if (start < 0 || stop > read.getReadLength() - 1)
      throw new ReviewedStingException(
          "Trying to clip before the start or after the end of a read");

    if (start > stop)
      throw new ReviewedStingException(
          String.format(
              "START (%d) > (%d) STOP -- this should never happen, please check read: %s (CIGAR: %s)",
              start, stop, read, read.getCigarString()));

    if (start > 0 && stop < read.getReadLength() - 1)
      throw new ReviewedStingException(
          String.format(
              "Trying to clip the middle of the read: start %d, stop %d, cigar: %s",
              start, stop, read.getCigarString()));

    this.addOp(new ClippingOp(start, stop));
    GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
    this.ops = null;
    return clippedRead;
  }
  @Test(enabled = true)
  public void testGetMaxReadLength() {
    for (final int minLength : Arrays.asList(5, 30, 50)) {
      for (final int maxLength : Arrays.asList(50, 75, 100)) {
        final List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
        for (int readLength = minLength; readLength <= maxLength; readLength++) {
          reads.add(ReadUtils.createRandomRead(readLength));
        }
        Assert.assertEquals(
            ReadUtils.getMaxReadLength(reads), maxLength, "max length does not match");
      }
    }

    final List<GATKSAMRecord> reads = new LinkedList<GATKSAMRecord>();
    Assert.assertEquals(
        ReadUtils.getMaxReadLength(reads), 0, "Empty list should have max length of zero");
  }
 private static void runTest(
     final GATKSAMReadGroupRecord rg, final String expected, final ReadGroupCovariate covariate) {
   GATKSAMRecord read = ReadUtils.createRandomRead(10);
   read.setReadGroup(rg);
   ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
   covariate.recordValues(read, readCovariates);
   verifyCovariateArray(readCovariates.getMismatchesKeySet(), expected, covariate);
 }
 @Test(enabled = true)
 public void testGetBasesReverseComplement() {
   int iterations = 1000;
   Random random = GenomeAnalysisEngine.getRandomGenerator();
   while (iterations-- > 0) {
     final int l = random.nextInt(1000);
     GATKSAMRecord read = GATKSAMRecord.createRandomRead(l);
     byte[] original = read.getReadBases();
     byte[] reconverted = new byte[l];
     String revComp = ReadUtils.getBasesReverseComplement(read);
     for (int i = 0; i < l; i++) {
       reconverted[l - 1 - i] = BaseUtils.getComplement((byte) revComp.charAt(i));
     }
     Assert.assertEquals(reconverted, original);
   }
 }
  @Test(enabled = true)
  public void testReadWithNsRefAfterDeletion() throws FileNotFoundException {

    final IndexedFastaSequenceFile seq =
        new CachingIndexedFastaSequenceFile(new File(b37KGReference));
    final SAMFileHeader header =
        ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary());
    final int readLength = 76;

    final GATKSAMRecord read =
        ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 8975, readLength);
    read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
    read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
    read.setCigarString("3M414N1D73M");

    final int result =
        ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(
            read, 9393, ReadUtils.ClippingTail.LEFT_TAIL);
    Assert.assertEquals(result, 3);
  }
  private ArrayList<Allele> computeConsensusAlleles(
      ReferenceContext ref,
      Map<String, AlignmentContext> contexts,
      AlignmentContextUtils.ReadOrientation contextType) {
    Allele refAllele = null, altAllele = null;
    GenomeLoc loc = ref.getLocus();
    ArrayList<Allele> aList = new ArrayList<Allele>();

    HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>();

    int insCount = 0, delCount = 0;
    // quick check of total number of indels in pileup
    for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
      AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);

      final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
      insCount += indelPileup.getNumberOfInsertions();
      delCount += indelPileup.getNumberOfDeletions();
    }

    if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
      return aList;

    for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
      // todo -- warning, can be duplicating expensive partition here
      AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);

      final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();

      for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
        // SAMRecord read = p.getRead();
        GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
        if (read == null) continue;
        if (ReadUtils.is454Read(read)) {
          continue;
        }

        /*                if (DEBUG && p.isIndel()) {
                         System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
                                 read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
                                 p.getEventLength(),p.getType().toString(), p.getEventBases());
                     }
        */

        String indelString = p.getEventBases();
        if (p.isInsertion()) {
          boolean foundKey = false;
          if (read.getAlignmentEnd() == loc.getStart()) {
            // first corner condition: a read has an insertion at the end, and we're right at the
            // insertion.
            // In this case, the read could have any of the inserted bases and we need to build a
            // consensus
            for (String s : consensusIndelStrings.keySet()) {
              int cnt = consensusIndelStrings.get(s);
              if (s.startsWith(indelString)) {
                // case 1: current insertion is prefix of indel in hash map
                consensusIndelStrings.put(s, cnt + 1);
                foundKey = true;
                break;
              } else if (indelString.startsWith(s)) {
                // case 2: indel stored in hash table is prefix of current insertion
                // In this case, new bases are new key.
                consensusIndelStrings.remove(s);
                consensusIndelStrings.put(indelString, cnt + 1);
                foundKey = true;
                break;
              }
            }
            if (!foundKey)
              // none of the above: event bases not supported by previous table, so add new key
              consensusIndelStrings.put(indelString, 1);

          } else if (read.getAlignmentStart() == loc.getStart() + 1) {
            // opposite corner condition: read will start at current locus with an insertion
            for (String s : consensusIndelStrings.keySet()) {
              int cnt = consensusIndelStrings.get(s);
              if (s.endsWith(indelString)) {
                // case 1: current insertion is suffix of indel in hash map
                consensusIndelStrings.put(s, cnt + 1);
                foundKey = true;
                break;
              } else if (indelString.endsWith(s)) {
                // case 2: indel stored in hash table is suffix of current insertion
                // In this case, new bases are new key.

                consensusIndelStrings.remove(s);
                consensusIndelStrings.put(indelString, cnt + 1);
                foundKey = true;
                break;
              }
            }
            if (!foundKey)
              // none of the above: event bases not supported by previous table, so add new key
              consensusIndelStrings.put(indelString, 1);

          } else {
            // normal case: insertion somewhere in the middle of a read: add count to hash map
            int cnt =
                consensusIndelStrings.containsKey(indelString)
                    ? consensusIndelStrings.get(indelString)
                    : 0;
            consensusIndelStrings.put(indelString, cnt + 1);
          }

        } else if (p.isDeletion()) {
          indelString = String.format("D%d", p.getEventLength());
          int cnt =
              consensusIndelStrings.containsKey(indelString)
                  ? consensusIndelStrings.get(indelString)
                  : 0;
          consensusIndelStrings.put(indelString, cnt + 1);
        }
      }

      /*            if (DEBUG) {
          int icount = indelPileup.getNumberOfInsertions();
          int dcount = indelPileup.getNumberOfDeletions();
          if (icount + dcount > 0)
          {
              List<Pair<String,Integer>> eventStrings = indelPileup.getEventStringsWithCounts(ref.getBases());
              System.out.format("#ins: %d, #del:%d\n", insCount, delCount);

              for (int i=0 ; i < eventStrings.size() ; i++ ) {
                  System.out.format("%s:%d,",eventStrings.get(i).first,eventStrings.get(i).second);
                  //                int k=0;
              }
              System.out.println();
          }
      }             */
    }

    int maxAlleleCnt = 0;
    String bestAltAllele = "";
    for (String s : consensusIndelStrings.keySet()) {
      int curCnt = consensusIndelStrings.get(s);
      if (curCnt > maxAlleleCnt) {
        maxAlleleCnt = curCnt;
        bestAltAllele = s;
      }
      //            if (DEBUG)
      //                System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s)  );
    } // gdebug-

    if (maxAlleleCnt < minIndelCountForGenotyping) return aList;

    if (bestAltAllele.startsWith("D")) {
      // get deletion length
      int dLen = Integer.valueOf(bestAltAllele.substring(1));
      // get ref bases of accurate deletion
      int startIdxInReference = (int) (1 + loc.getStart() - ref.getWindow().getStart());

      // System.out.println(new String(ref.getBases()));
      byte[] refBases =
          Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);

      if (Allele.acceptableAlleleBases(refBases)) {
        refAllele = Allele.create(refBases, true);
        altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
      }
    } else {
      // insertion case
      if (Allele.acceptableAlleleBases(bestAltAllele)) {
        refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
        altAllele = Allele.create(bestAltAllele, false);
      }
    }
    if (refAllele != null && altAllele != null) {
      aList.add(0, refAllele);
      aList.add(1, altAllele);
    }
    return aList;
  }
 @Test(dataProvider = "HasWellDefinedFragmentSizeData")
 private void testHasWellDefinedFragmentSize(
     final String name, final GATKSAMRecord read, final boolean expected) {
   Assert.assertEquals(ReadUtils.hasWellDefinedFragmentSize(read), expected);
 }
Ejemplo n.º 9
0
  @Test(enabled = false)
  public void testCovariateGeneration() {
    final String RGID = "id";
    final int length = 10;
    final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
    GATKSAMRecord read = ReadUtils.createRandomRead(length, false);
    GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(RGID);
    rg.setPlatform("illumina");
    read.setReadGroup(rg);
    final byte[] mQuals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
    final byte[] iQuals = read.getBaseQualities(EventType.BASE_INSERTION);
    final byte[] dQuals = read.getBaseQualities(EventType.BASE_DELETION);

    ReadGroupCovariate rgCov = new ReadGroupCovariate();
    QualityScoreCovariate qsCov = new QualityScoreCovariate();
    ContextCovariate coCov = new ContextCovariate();
    CycleCovariate cyCov = new CycleCovariate();

    rgCov.initialize(RAC);
    qsCov.initialize(RAC);
    coCov.initialize(RAC);
    cyCov.initialize(RAC);

    Covariate[] requestedCovariates = new Covariate[4];
    requestedCovariates[0] = rgCov;
    requestedCovariates[1] = qsCov;
    requestedCovariates[2] = coCov;
    requestedCovariates[3] = cyCov;

    ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);

    // check that the length is correct
    Assert.assertEquals(rc.getMismatchesKeySet().length, length);
    Assert.assertEquals(rc.getInsertionsKeySet().length, length);
    Assert.assertEquals(rc.getDeletionsKeySet().length, length);

    for (int i = 0; i < length; i++) {
      // check that read group is always the same
      Assert.assertEquals(rgCov.formatKey(rc.getMismatchesKeySet(i)[0]), RGID);
      Assert.assertEquals(rgCov.formatKey(rc.getInsertionsKeySet(i)[0]), RGID);
      Assert.assertEquals(rgCov.formatKey(rc.getDeletionsKeySet(i)[0]), RGID);

      // check quality score
      Assert.assertEquals(qsCov.formatKey(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]);
      Assert.assertEquals(qsCov.formatKey(rc.getInsertionsKeySet(i)[1]), "" + iQuals[i]);
      Assert.assertEquals(qsCov.formatKey(rc.getDeletionsKeySet(i)[1]), "" + dQuals[i]);

      // check context
      Assert.assertEquals(
          coCov.formatKey(rc.getMismatchesKeySet(i)[2]),
          ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE));
      Assert.assertEquals(
          coCov.formatKey(rc.getInsertionsKeySet(i)[2]),
          ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE));
      Assert.assertEquals(
          coCov.formatKey(rc.getDeletionsKeySet(i)[2]),
          ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE));

      // check cycle
      Assert.assertEquals(cyCov.formatKey(rc.getMismatchesKeySet(i)[3]), "" + (i + 1));
      Assert.assertEquals(cyCov.formatKey(rc.getInsertionsKeySet(i)[3]), "" + (i + 1));
      Assert.assertEquals(cyCov.formatKey(rc.getDeletionsKeySet(i)[3]), "" + (i + 1));
    }
  }
Ejemplo n.º 10
0
 public static GATKSAMRecord createRandomRead(int length, boolean allowNs) {
   byte[] quals = ReadUtils.createRandomReadQuals(length);
   byte[] bbases = ReadUtils.createRandomReadBases(length, allowNs);
   return ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
 }