/**
   * Update the recalibration statistics using the information in recalInfo
   *
   * @param recalInfo data structure holding information about the recalibration values for a single
   *     read
   */
  @Requires("recalInfo != null")
  public void updateDataForRead(final ReadRecalibrationInfo recalInfo) {
    final GATKSAMRecord read = recalInfo.getRead();
    final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
    final RecalibrationTables tables = getUpdatableRecalibrationTables();
    final NestedIntegerArray<RecalDatum> qualityScoreTable = tables.getQualityScoreTable();

    for (int offset = 0; offset < read.getReadBases().length; offset++) {
      if (!recalInfo.skip(offset)) {

        for (final EventType eventType : EventType.values()) {
          final int[] keys = readCovariates.getKeySet(offset, eventType);
          final int eventIndex = eventType.ordinal();
          final byte qual = recalInfo.getQual(eventType, offset);
          final double isError = recalInfo.getErrorFraction(eventType, offset);

          RecalUtils.incrementDatumOrPutIfNecessary(
              qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);

          for (int i = 2; i < covariates.length; i++) {
            if (keys[i] < 0) continue;

            RecalUtils.incrementDatumOrPutIfNecessary(
                tables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
          }
        }
      }
    }
  }
 @Test(expectedExceptions = IllegalStateException.class)
 public void testStrandlessReadsFailSetStrand() {
   final byte[] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
   final byte[] quals = {20, 20, 20, 20, 20, 20, 20, 20};
   GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M");
   read.setIsStrandless(true);
   read.setReadNegativeStrandFlag(true);
 }
 @BeforeClass
 public void init() {
   SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
   read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
   read.setReadUnmappedFlag(true);
   read.setReadBases(new String(BASES).getBytes());
   read.setBaseQualityString(new String(QUALS));
 }
 @Test
 public void realignAtContigBorderTest() {
   final int contigEnd = header.getSequence(0).getSequenceLength();
   final GATKSAMRecord read =
       ArtificialSAMUtils.createArtificialRead(header, "goodRead", 0, contigEnd - 1, 2);
   read.setCigarString("2M");
   Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), false);
   read.setCigarString("1M1D1M");
   Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), true);
 }
 private void verifySortednessOfReads(final List<GATKSAMRecord> reads) {
   int lastStart = -1;
   for (GATKSAMRecord read : reads) {
     Assert.assertTrue(
         lastStart <= read.getAlignmentStart(),
         "Reads should be sorted but weren't.  Found read with start "
             + read.getAlignmentStart()
             + " while last was "
             + lastStart);
     lastStart = read.getAlignmentStart();
   }
 }
  @Override
  protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) {
    final int offset =
        ReadUtils.getReadCoordinateForReferenceCoordinate(
            read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true);
    if (offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED) return null;

    int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), offset, false, 0, 0);
    final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips(read);
    if (readPos > numAlignedBases / 2) readPos = numAlignedBases - (readPos + 1);
    return (double) readPos;
  }
 /**
  * Shallow copy of everything, except for the attribute list and the temporary attributes. A new
  * list of the attributes is created for both, but the attributes themselves are copied by
  * reference. This should be safe because callers should never modify a mutable value returned by
  * any of the get() methods anyway.
  *
  * @return a shallow copy of the GATKSAMRecord
  */
 @Override
 public Object clone() {
   try {
     final GATKSAMRecord clone = (GATKSAMRecord) super.clone();
     if (temporaryAttributes != null) {
       clone.temporaryAttributes = new HashMap<>();
       for (Object attribute : temporaryAttributes.keySet())
         clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute));
     }
     return clone;
   } catch (final CloneNotSupportedException e) {
     throw new RuntimeException(e);
   }
 }
 @Override
 protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) {
   return (double)
       read.getBaseQualities()[
           ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(
               read, refLoc, ReadUtils.ClippingTail.RIGHT_TAIL)];
 }
Beispiel #9
0
  private GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) {
    GATKSAMRecord unclipped = (GATKSAMRecord) read.clone();

    Cigar unclippedCigar = new Cigar();
    int matchesCount = 0;
    for (CigarElement element : read.getCigar().getCigarElements()) {
      if (element.getOperator() == CigarOperator.SOFT_CLIP
          || element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
        matchesCount += element.getLength();
      else if (matchesCount > 0) {
        unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
        matchesCount = 0;
        unclippedCigar.add(element);
      } else unclippedCigar.add(element);
    }
    if (matchesCount > 0)
      unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));

    unclipped.setCigar(unclippedCigar);
    final int newStart =
        read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar);
    unclipped.setAlignmentStart(newStart);

    if (newStart <= 0) {
      // if the start of the unclipped read occurs before the contig,
      // we must hard clip away the bases since we cannot represent reads with
      // negative or 0 alignment start values in the SAMRecord (e.g., 0 means unaligned)
      return hardClip(unclipped, 0, -newStart);
    } else {
      return unclipped;
    }
  }
  @Override
  public Integer map(
      ReferenceContext referenceContext,
      GATKSAMRecord read,
      RefMetaDataTracker RefMetaDataTracker) {
    final String rgID = read.getReadGroup().getId();
    final PerReadGroupInfo info = readGroupInfo.get(rgID);

    if (info.needsMoreData()) {
      info.readLength.add(read.getReadLength());
      info.nReadsSeen++;
      if (read.getReadPairedFlag()) {
        info.nReadsPaired++;
        if (read.getInferredInsertSize() != 0) {
          info.insertSize.add(Math.abs(read.getInferredInsertSize()));
        }
      }
    }

    return null;
  }
  /** Ensure that splitting read groups still works when dealing with null read groups. */
  @Test
  public void testSplitByNullReadGroups() {
    SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);

    GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10);
    GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10);
    GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10);

    ReadBackedPileup pileup =
        new ReadBackedPileupImpl(null, Arrays.asList(read1, read2, read3), Arrays.asList(1, 1, 1));

    ReadBackedPileup nullRgPileup = pileup.getPileupForReadGroup(null);
    List<GATKSAMRecord> nullRgReads = nullRgPileup.getReads();
    Assert.assertEquals(
        nullRgPileup.getNumberOfElements(), 3, "Wrong number of reads in null read group");
    Assert.assertEquals(
        nullRgReads.get(0),
        read1,
        "Read " + read1.getReadName() + " should be in null rg but isn't");
    Assert.assertEquals(
        nullRgReads.get(1),
        read2,
        "Read " + read2.getReadName() + " should be in null rg but isn't");
    Assert.assertEquals(
        nullRgReads.get(2),
        read3,
        "Read " + read3.getReadName() + " should be in null rg but isn't");

    ReadBackedPileup rg1Pileup = pileup.getPileupForReadGroup("rg1");
    Assert.assertNull(rg1Pileup, "Pileup for non-existent read group should return null");
  }
  @Test
  public void testGetPileupForSample() {
    String sample1 = "sample1";
    String sample2 = "sample2";

    SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
    readGroupOne.setSample(sample1);
    SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2");
    readGroupTwo.setSample(sample2);

    SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
    header.addReadGroup(readGroupOne);
    header.addReadGroup(readGroupTwo);

    GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10);
    read1.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10);
    read2.setAttribute("RG", readGroupTwo.getId());

    Map<String, ReadBackedPileupImpl> sampleToPileupMap =
        new HashMap<String, ReadBackedPileupImpl>();
    sampleToPileupMap.put(
        sample1, new ReadBackedPileupImpl(null, Collections.singletonList(read1), 0));
    sampleToPileupMap.put(
        sample2, new ReadBackedPileupImpl(null, Collections.singletonList(read2), 0));

    ReadBackedPileup pileup = new ReadBackedPileupImpl(null, sampleToPileupMap);

    ReadBackedPileup sample2Pileup = pileup.getPileupForSample(sample2);
    Assert.assertEquals(
        sample2Pileup.getNumberOfElements(), 1, "Sample 2 pileup has wrong number of elements");
    Assert.assertEquals(
        sample2Pileup.getReads().get(0), read2, "Sample 2 pileup has incorrect read");

    ReadBackedPileup missingSamplePileup = pileup.getPileupForSample("missing");
    Assert.assertNull(missingSamplePileup, "Pileup for sample 'missing' should be null but isn't");

    missingSamplePileup = pileup.getPileupForSample("not here");
    Assert.assertNull(missingSamplePileup, "Pileup for sample 'not here' should be null but isn't");
  }
  /**
   * Creates a new GATKSAMRecord with the source read's header, read group and mate information, but
   * with the following fields set to user-supplied values: - Read Bases - Base Qualities - Base
   * Insertion Qualities - Base Deletion Qualities
   *
   * <p>Cigar string is empty (not-null)
   *
   * <p>Use this method if you want to create a new GATKSAMRecord based on another GATKSAMRecord,
   * but with modified bases and qualities
   *
   * @param read a read to copy the header from
   * @param readBases an array containing the new bases you wish use in place of the originals
   * @param baseQualities an array containing the new base qualities you wish use in place of the
   *     originals
   * @param baseInsertionQualities an array containing the new base insertion qaulities
   * @param baseDeletionQualities an array containing the new base deletion qualities
   * @return a read with modified bases and qualities, safe for the GATK
   */
  public static GATKSAMRecord createQualityModifiedRead(
      final GATKSAMRecord read,
      final byte[] readBases,
      final byte[] baseQualities,
      final byte[] baseInsertionQualities,
      final byte[] baseDeletionQualities) {
    if (baseQualities.length != readBases.length
        || baseInsertionQualities.length != readBases.length
        || baseDeletionQualities.length != readBases.length)
      throw new IllegalArgumentException(
          "Read bases and read quality arrays aren't the same size: Bases:"
              + readBases.length
              + " vs Base Q's:"
              + baseQualities.length
              + " vs Insert Q's:"
              + baseInsertionQualities.length
              + " vs Delete Q's:"
              + baseDeletionQualities.length);

    final GATKSAMRecord processedRead = GATKSAMRecord.emptyRead(read);
    processedRead.setReadBases(readBases);
    processedRead.setBaseQualities(baseQualities, EventType.BASE_SUBSTITUTION);
    processedRead.setBaseQualities(baseInsertionQualities, EventType.BASE_INSERTION);
    processedRead.setBaseQualities(baseDeletionQualities, EventType.BASE_DELETION);

    return processedRead;
  }
  public double[] computeReadHaplotypeLikelihoods(
      ReadBackedPileup pileup, HashMap<Allele, Haplotype> haplotypesInVC) {
    double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()];
    double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()];
    int i = 0;
    for (GATKSAMRecord read : pileup.getReads()) {
      if (ReadUtils.is454Read(read)) {
        continue;
      }
      // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi))
      // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent
      int j = 0;
      for (Map.Entry<Allele, Haplotype> a : haplotypesInVC.entrySet()) {
        readLikelihoods[i][j] = computeReadLikelihoodGivenHaplotype(a.getValue(), read);
        if (DEBUG) {
          System.out.print(read.getReadName() + " ");

          System.out.format(
              "%d %d S:%d US:%d E:%d UE:%d C:%s %3.4f\n",
              i,
              j,
              read.getAlignmentStart(),
              read.getUnclippedStart(),
              read.getAlignmentEnd(),
              read.getUnclippedEnd(),
              read.getCigarString(),
              readLikelihoods[i][j]);
        }
        j++;
      }
      i++;
    }

    for (i = 0; i < haplotypesInVC.size(); i++) {
      for (int j = i; j < haplotypesInVC.size(); j++) {
        // combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
        // L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2)
        // readLikelihoods[k][j] has log10(Pr(R_k) | H[j] )
        double[] readLikelihood = new double[2]; // diploid sample
        for (int readIdx = 0; readIdx < pileup.getReads().size(); readIdx++) {
          readLikelihood[0] = -readLikelihoods[readIdx][i] / 10;
          readLikelihood[1] = -readLikelihoods[readIdx][j] / 10;

          // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+x0^x2)-log10(2)
          // First term is approximated by Jacobian log with table lookup.
          // Second term is a constant added to both likelihoods so will be ignored
          haplotypeLikehoodMatrix[i][j] +=
              MathUtils.approximateLog10SumLog10(readLikelihood[0], readLikelihood[1]);
        }
      }
    }

    return getHaplotypeLikelihoods(haplotypeLikehoodMatrix);
  }
  /**
   * Is this read poorly modelled by all of the alleles in this map?
   *
   * <p>A read is poorly modeled when it's likelihood is below what would be expected for a read
   * originating from one of the alleles given the maxErrorRatePerBase of the reads in general.
   *
   * <p>This function makes a number of key assumptions. First, that the likelihoods reflect the
   * total likelihood of the read. In other words, that the read would be fully explained by one of
   * the alleles. This means that the allele should be something like the full haplotype from which
   * the read might originate.
   *
   * <p>It further assumes that each error in the read occurs with likelihood of -3 (Q30 confidence
   * per base). So a read with a 10% error rate with Q30 bases that's 100 bp long we'd expect to see
   * 10 real Q30 errors even against the true haplotype. So for this read to be well modelled by at
   * least one allele we'd expect a likelihood to be >= 10 * -3.
   *
   * @param read the read we want to evaluate
   * @param log10Likelihoods a list of the log10 likelihoods of the read against a set of
   *     haplotypes.
   * @param maxErrorRatePerBase the maximum error rate we'd expect for this read per base, in real
   *     space. So 0.01 means a 1% error rate
   * @return true if none of the log10 likelihoods imply that the read truly originated from one of
   *     the haplotypes
   */
  protected boolean readIsPoorlyModelled(
      final GATKSAMRecord read,
      final Collection<Double> log10Likelihoods,
      final double maxErrorRatePerBase) {
    final double maxErrorsForRead =
        Math.min(2.0, Math.ceil(read.getReadLength() * maxErrorRatePerBase));
    final double log10QualPerBase = -4.0;
    final double log10MaxLikelihoodForTrueAllele = maxErrorsForRead * log10QualPerBase;

    for (final double log10Likelihood : log10Likelihoods)
      if (log10Likelihood >= log10MaxLikelihoodForTrueAllele) return false;

    return true;
  }
  /**
   * @param read a read containing the variant
   * @return number of hard clipped and low qual bases at the read end (where end is right end
   *     w.r.t. the reference)
   */
  public static int getNumClippedBasesAtEnd(final GATKSAMRecord read) {
    // check for hard clips (never consider these bases):
    final Cigar c = read.getCigar();
    CigarElement last = c.getCigarElement(c.numCigarElements() - 1);

    int numEndClippedBases = 0;
    if (last.getOperator() == CigarOperator.H) {
      numEndClippedBases = last.getLength();
    }
    final byte[] unclippedReadBases = read.getReadBases();
    final byte[] unclippedReadQuals = read.getBaseQualities();

    // Do a stricter base clipping than provided by CIGAR string, since this one may be too
    // conservative,
    // and may leave a string of Q2 bases still hanging off the reads.
    // TODO: this code may not even get used because HaplotypeCaller already hard clips low quality
    // tails
    for (int i = unclippedReadBases.length - numEndClippedBases - 1; i >= 0; i--) {
      if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD) numEndClippedBases++;
      else break;
    }

    return numEndClippedBases;
  }
    private List<PileupElement> makeReads(final int n, final int mapq, final String op) {
      final int readLength = 3;

      final List<PileupElement> elts = new LinkedList<PileupElement>();
      for (int i = 0; i < n; i++) {
        GATKSAMRecord read =
            ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, readLength);
        read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
        read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
        read.setCigarString("1M1" + op + "1M");
        read.setMappingQuality(mapq);
        final int baseOffset = op.equals("M") ? 1 : 0;
        final CigarElement cigarElement = read.getCigar().getCigarElement(1);
        elts.add(new PileupElement(read, baseOffset, cigarElement, 1, 0));
      }

      return elts;
    }
  @Test
  public void testRBPMappingQuals() {

    // create a read with high MQ
    final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, 10);
    read.setReadBases(Utils.dupBytes((byte) 'A', 10));
    read.setBaseQualities(Utils.dupBytes((byte) 30, 10));
    read.setCigarString("10M");
    read.setMappingQuality(200); // set a MQ higher than max signed byte

    // now create the RBP
    final List<PileupElement> elts = new LinkedList<>();
    elts.add(new PileupElement(read, 0, read.getCigar().getCigarElement(0), 0, 0));
    final Map<String, ReadBackedPileupImpl> pileupsBySample = new HashMap<>();
    pileupsBySample.put("foo", new ReadBackedPileupImpl(loc, elts));
    final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, pileupsBySample);

    Assert.assertEquals(pileup.getMappingQuals()[0], 200);
  }
  /**
   * Creates an empty GATKSAMRecord with the read's header, read group and mate information, but
   * empty (not-null) fields: - Cigar String - Read Bases - Base Qualities
   *
   * <p>Use this method if you want to create a new empty GATKSAMRecord based on another
   * GATKSAMRecord
   *
   * @param read a read to copy the header from
   * @return a read with no bases but safe for the GATK
   */
  public static GATKSAMRecord emptyRead(GATKSAMRecord read) {
    final GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader());
    emptyRead.setReferenceIndex(read.getReferenceIndex());
    emptyRead.setAlignmentStart(0);
    emptyRead.setMappingQuality(0);
    // setting read indexing bin last
    emptyRead.setFlags(read.getFlags());
    emptyRead.setMateReferenceIndex(read.getMateReferenceIndex());
    emptyRead.setMateAlignmentStart(read.getMateAlignmentStart());
    emptyRead.setInferredInsertSize(read.getInferredInsertSize());

    emptyRead.setCigarString("");
    emptyRead.setReadBases(new byte[0]);
    emptyRead.setBaseQualities(new byte[0]);

    SAMReadGroupRecord samRG = read.getReadGroup();
    emptyRead.clearAttributes();
    if (samRG != null) {
      GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG);
      emptyRead.setReadGroup(rg);
    }

    GATKBin.setReadIndexingBin(emptyRead, 0);

    return emptyRead;
  }
Beispiel #20
0
  /**
   * Clips the bases in read according to this operation's start and stop. Uses the clipping
   * representation used is the one provided by algorithm argument.
   *
   * @param algorithm clipping algorithm to use
   * @param originalRead the read to be clipped
   */
  public GATKSAMRecord apply(ClippingRepresentation algorithm, GATKSAMRecord originalRead) {
    GATKSAMRecord read = (GATKSAMRecord) originalRead.clone();
    byte[] quals = read.getBaseQualities();
    byte[] bases = read.getReadBases();
    byte[] newBases = new byte[bases.length];
    byte[] newQuals = new byte[quals.length];

    switch (algorithm) {
        // important note:
        //   it's not safe to call read.getReadBases()[i] = 'N' or read.getBaseQualities()[i] = 0
        //   because you're not guaranteed to get a pointer to the actual array of bytes in the
        // GATKSAMRecord
      case WRITE_NS:
        for (int i = 0; i < bases.length; i++) {
          if (i >= start && i <= stop) {
            newBases[i] = 'N';
          } else {
            newBases[i] = bases[i];
          }
        }
        read.setReadBases(newBases);
        break;
      case WRITE_Q0S:
        for (int i = 0; i < quals.length; i++) {
          if (i >= start && i <= stop) {
            newQuals[i] = 0;
          } else {
            newQuals[i] = quals[i];
          }
        }
        read.setBaseQualities(newQuals);
        break;
      case WRITE_NS_Q0S:
        for (int i = 0; i < bases.length; i++) {
          if (i >= start && i <= stop) {
            newQuals[i] = 0;
            newBases[i] = 'N';
          } else {
            newQuals[i] = quals[i];
            newBases[i] = bases[i];
          }
        }
        read.setBaseQualities(newBases);
        read.setReadBases(newBases);
        break;
      case HARDCLIP_BASES:
        read = hardClip(read, start, stop);
        break;

      case SOFTCLIP_BASES:
        if (read.getReadUnmappedFlag()) {
          // we can't process unmapped reads
          throw new UserException("Read Clipper cannot soft clip unmapped reads");
        }

        // System.out.printf("%d %d %d%n", stop, start, read.getReadLength());
        int myStop = stop;
        if ((stop + 1 - start) == read.getReadLength()) {
          // BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it
          // alone
          // Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be
          // represented with SOFTCLIP_BASES, just leaving it alone", read.getReadName()));
          // break;
          myStop--; // just decrement stop
        }

        if (start > 0 && myStop != read.getReadLength() - 1)
          throw new RuntimeException(
              String.format(
                  "Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d",
                  read.getReadName(), start, myStop));

        Cigar oldCigar = read.getCigar();

        int scLeft = 0, scRight = read.getReadLength();
        if (start == 0) scLeft = myStop + 1;
        else scRight = start;

        Cigar newCigar = softClip(oldCigar, scLeft, scRight);
        read.setCigar(newCigar);

        int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar);
        int newStart = read.getAlignmentStart() + newClippedStart;
        read.setAlignmentStart(newStart);

        break;

      case REVERT_SOFTCLIPPED_BASES:
        read = revertSoftClippedBases(read);
        break;

      default:
        throw new IllegalStateException("Unexpected Clipping operator type " + algorithm);
    }

    return read;
  }
Beispiel #21
0
  /**
   * Hard clip bases from read, from start to stop in base coordinates
   *
   * <p>If start == 0, then we will clip from the front of the read, otherwise we clip from the
   * right. If start == 0 and stop == 10, this would clip out the first 10 bases of the read.
   *
   * <p>Note that this function works with reads with negative alignment starts, in order to allow
   * us to hardClip reads that have had their soft clips reverted and so might have negative
   * alignment starts
   *
   * <p>Works properly with reduced reads and insertion/deletion base qualities
   *
   * @param read a non-null read
   * @param start a start >= 0 and < read.length
   * @param stop a stop >= 0 and < read.length.
   * @return a cloned version of read that has been properly trimmed down
   */
  private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) {

    // If the read is unmapped there is no Cigar string and neither should we create a new cigar
    // string
    final CigarShift cigarShift =
        (read.getReadUnmappedFlag())
            ? new CigarShift(new Cigar(), 0, 0)
            : hardClipCigar(read.getCigar(), start, stop);

    // the cigar may force a shift left or right (or both) in case we are left with insertions
    // starting or ending the read after applying the hard clip on start/stop.
    final int newLength =
        read.getReadLength()
            - (stop - start + 1)
            - cigarShift.shiftFromStart
            - cigarShift.shiftFromEnd;
    final byte[] newBases = new byte[newLength];
    final byte[] newQuals = new byte[newLength];
    final int copyStart =
        (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;

    System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
    System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);

    final GATKSAMRecord hardClippedRead = (GATKSAMRecord) read.clone();

    hardClippedRead
        .resetSoftStartAndEnd(); // reset the cached soft start and end because they may have
                                 // changed now that the read was hard clipped. No need to calculate
                                 // them now. They'll be lazily calculated on the next call to
                                 // getSoftStart()/End()
    hardClippedRead.setBaseQualities(newQuals);
    hardClippedRead.setReadBases(newBases);
    hardClippedRead.setCigar(cigarShift.cigar);
    if (start == 0)
      hardClippedRead.setAlignmentStart(
          read.getAlignmentStart()
              + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));

    if (read.hasBaseIndelQualities()) {
      final byte[] newBaseInsertionQuals = new byte[newLength];
      final byte[] newBaseDeletionQuals = new byte[newLength];
      System.arraycopy(
          read.getBaseInsertionQualities(), copyStart, newBaseInsertionQuals, 0, newLength);
      System.arraycopy(
          read.getBaseDeletionQualities(), copyStart, newBaseDeletionQuals, 0, newLength);
      hardClippedRead.setBaseQualities(newBaseInsertionQuals, EventType.BASE_INSERTION);
      hardClippedRead.setBaseQualities(newBaseDeletionQuals, EventType.BASE_DELETION);
    }

    return hardClippedRead;
  }
 // TODO: this is bizarre -- this code counts hard clips, but then subtracts them from the read
 // length, which already doesn't count hard clips
 public static int getNumAlignedBases(final GATKSAMRecord read) {
   return read.getReadLength() - getNumClippedBasesAtStart(read) - getNumClippedBasesAtEnd(read);
 }
  /** Ensure that basic read group splitting works. */
  @Test
  public void testSplitByReadGroup() {
    SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
    SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2");

    SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
    header.addReadGroup(readGroupOne);
    header.addReadGroup(readGroupTwo);

    GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10);
    read1.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10);
    read2.setAttribute("RG", readGroupTwo.getId());
    GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10);
    read3.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header, "read4", 0, 1, 10);
    read4.setAttribute("RG", readGroupTwo.getId());
    GATKSAMRecord read5 = ArtificialSAMUtils.createArtificialRead(header, "read5", 0, 1, 10);
    read5.setAttribute("RG", readGroupTwo.getId());
    GATKSAMRecord read6 = ArtificialSAMUtils.createArtificialRead(header, "read6", 0, 1, 10);
    read6.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read7 = ArtificialSAMUtils.createArtificialRead(header, "read7", 0, 1, 10);
    read7.setAttribute("RG", readGroupOne.getId());

    ReadBackedPileup pileup =
        new ReadBackedPileupImpl(
            null,
            Arrays.asList(read1, read2, read3, read4, read5, read6, read7),
            Arrays.asList(1, 1, 1, 1, 1, 1, 1));

    ReadBackedPileup rg1Pileup = pileup.getPileupForReadGroup("rg1");
    List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads();
    Assert.assertEquals(rg1Reads.size(), 4, "Wrong number of reads in read group rg1");
    Assert.assertEquals(
        rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't");
    Assert.assertEquals(
        rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't");
    Assert.assertEquals(
        rg1Reads.get(2), read6, "Read " + read6.getReadName() + " should be in rg1 but isn't");
    Assert.assertEquals(
        rg1Reads.get(3), read7, "Read " + read7.getReadName() + " should be in rg1 but isn't");

    ReadBackedPileup rg2Pileup = pileup.getPileupForReadGroup("rg2");
    List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads();
    Assert.assertEquals(rg2Reads.size(), 3, "Wrong number of reads in read group rg2");
    Assert.assertEquals(
        rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't");
    Assert.assertEquals(
        rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't");
    Assert.assertEquals(
        rg2Reads.get(2), read5, "Read " + read5.getReadName() + " should be in rg2 but isn't");
  }
  @Test
  public void testStrandlessReads() {
    final byte[] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
    final byte[] quals = {20, 20, 20, 20, 20, 20, 20, 20};
    GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M");
    Assert.assertEquals(read.isStrandless(), false);

    read.setReadNegativeStrandFlag(false);
    Assert.assertEquals(read.isStrandless(), false);
    Assert.assertEquals(read.getReadNegativeStrandFlag(), false);

    read.setReadNegativeStrandFlag(true);
    Assert.assertEquals(read.isStrandless(), false);
    Assert.assertEquals(read.getReadNegativeStrandFlag(), true);

    read.setReadNegativeStrandFlag(true);
    read.setIsStrandless(true);
    Assert.assertEquals(read.isStrandless(), true);
    Assert.assertEquals(
        read.getReadNegativeStrandFlag(),
        false,
        "negative strand flag should return false even through its set for a strandless read");
  }
  /** Ensure that splitting read groups still works when dealing with a sample-split pileup. */
  @Test
  public void testSplitBySample() {
    SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
    readGroupOne.setSample("sample1");
    SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2");
    readGroupTwo.setSample("sample2");

    SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
    header.addReadGroup(readGroupOne);
    header.addReadGroup(readGroupTwo);

    GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10);
    read1.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10);
    read2.setAttribute("RG", readGroupTwo.getId());
    GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10);
    read3.setAttribute("RG", readGroupOne.getId());
    GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header, "read4", 0, 1, 10);
    read4.setAttribute("RG", readGroupTwo.getId());

    ReadBackedPileupImpl sample1Pileup =
        new ReadBackedPileupImpl(null, Arrays.asList(read1, read3), Arrays.asList(1, 1));
    ReadBackedPileupImpl sample2Pileup =
        new ReadBackedPileupImpl(null, Arrays.asList(read2, read4), Arrays.asList(1, 1));
    Map<String, ReadBackedPileupImpl> sampleToPileupMap =
        new HashMap<String, ReadBackedPileupImpl>();
    sampleToPileupMap.put(readGroupOne.getSample(), sample1Pileup);
    sampleToPileupMap.put(readGroupTwo.getSample(), sample2Pileup);

    ReadBackedPileup compositePileup = new ReadBackedPileupImpl(null, sampleToPileupMap);

    ReadBackedPileup rg1Pileup = compositePileup.getPileupForReadGroup("rg1");
    List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads();

    Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg1");
    Assert.assertEquals(
        rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't");
    Assert.assertEquals(
        rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't");

    ReadBackedPileup rg2Pileup = compositePileup.getPileupForReadGroup("rg2");
    List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads();

    Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg2");
    Assert.assertEquals(
        rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't");
    Assert.assertEquals(
        rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't");
  }
 @Override
 protected boolean isUsableRead(final GATKSAMRecord read, final int refLoc) {
   return super.isUsableRead(read, refLoc)
       && read.getSoftStart() + read.getCigar().getReadLength() > refLoc;
 }
 @Override
 public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
   return !(read.getReadFailsVendorQualityCheckFlag() || read.getReadUnmappedFlag());
 }