/** * Update the recalibration statistics using the information in recalInfo * * @param recalInfo data structure holding information about the recalibration values for a single * read */ @Requires("recalInfo != null") public void updateDataForRead(final ReadRecalibrationInfo recalInfo) { final GATKSAMRecord read = recalInfo.getRead(); final ReadCovariates readCovariates = recalInfo.getCovariatesValues(); final RecalibrationTables tables = getUpdatableRecalibrationTables(); final NestedIntegerArray<RecalDatum> qualityScoreTable = tables.getQualityScoreTable(); for (int offset = 0; offset < read.getReadBases().length; offset++) { if (!recalInfo.skip(offset)) { for (final EventType eventType : EventType.values()) { final int[] keys = readCovariates.getKeySet(offset, eventType); final int eventIndex = eventType.ordinal(); final byte qual = recalInfo.getQual(eventType, offset); final double isError = recalInfo.getErrorFraction(eventType, offset); RecalUtils.incrementDatumOrPutIfNecessary( qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex); for (int i = 2; i < covariates.length; i++) { if (keys[i] < 0) continue; RecalUtils.incrementDatumOrPutIfNecessary( tables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex); } } } } }
@Test(expectedExceptions = IllegalStateException.class) public void testStrandlessReadsFailSetStrand() { final byte[] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'}; final byte[] quals = {20, 20, 20, 20, 20, 20, 20, 20}; GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M"); read.setIsStrandless(true); read.setReadNegativeStrandFlag(true); }
@BeforeClass public void init() { SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length()); read.setReadUnmappedFlag(true); read.setReadBases(new String(BASES).getBytes()); read.setBaseQualityString(new String(QUALS)); }
@Test public void realignAtContigBorderTest() { final int contigEnd = header.getSequence(0).getSequenceLength(); final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "goodRead", 0, contigEnd - 1, 2); read.setCigarString("2M"); Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), false); read.setCigarString("1M1D1M"); Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), true); }
private void verifySortednessOfReads(final List<GATKSAMRecord> reads) { int lastStart = -1; for (GATKSAMRecord read : reads) { Assert.assertTrue( lastStart <= read.getAlignmentStart(), "Reads should be sorted but weren't. Found read with start " + read.getAlignmentStart() + " while last was " + lastStart); lastStart = read.getAlignmentStart(); } }
@Override protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) { final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true); if (offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED) return null; int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), offset, false, 0, 0); final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips(read); if (readPos > numAlignedBases / 2) readPos = numAlignedBases - (readPos + 1); return (double) readPos; }
/** * Shallow copy of everything, except for the attribute list and the temporary attributes. A new * list of the attributes is created for both, but the attributes themselves are copied by * reference. This should be safe because callers should never modify a mutable value returned by * any of the get() methods anyway. * * @return a shallow copy of the GATKSAMRecord */ @Override public Object clone() { try { final GATKSAMRecord clone = (GATKSAMRecord) super.clone(); if (temporaryAttributes != null) { clone.temporaryAttributes = new HashMap<>(); for (Object attribute : temporaryAttributes.keySet()) clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute)); } return clone; } catch (final CloneNotSupportedException e) { throw new RuntimeException(e); } }
@Override protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) { return (double) read.getBaseQualities()[ ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead( read, refLoc, ReadUtils.ClippingTail.RIGHT_TAIL)]; }
private GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) { GATKSAMRecord unclipped = (GATKSAMRecord) read.clone(); Cigar unclippedCigar = new Cigar(); int matchesCount = 0; for (CigarElement element : read.getCigar().getCigarElements()) { if (element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.MATCH_OR_MISMATCH) matchesCount += element.getLength(); else if (matchesCount > 0) { unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH)); matchesCount = 0; unclippedCigar.add(element); } else unclippedCigar.add(element); } if (matchesCount > 0) unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH)); unclipped.setCigar(unclippedCigar); final int newStart = read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar); unclipped.setAlignmentStart(newStart); if (newStart <= 0) { // if the start of the unclipped read occurs before the contig, // we must hard clip away the bases since we cannot represent reads with // negative or 0 alignment start values in the SAMRecord (e.g., 0 means unaligned) return hardClip(unclipped, 0, -newStart); } else { return unclipped; } }
@Override public Integer map( ReferenceContext referenceContext, GATKSAMRecord read, RefMetaDataTracker RefMetaDataTracker) { final String rgID = read.getReadGroup().getId(); final PerReadGroupInfo info = readGroupInfo.get(rgID); if (info.needsMoreData()) { info.readLength.add(read.getReadLength()); info.nReadsSeen++; if (read.getReadPairedFlag()) { info.nReadsPaired++; if (read.getInferredInsertSize() != 0) { info.insertSize.add(Math.abs(read.getInferredInsertSize())); } } } return null; }
/** Ensure that splitting read groups still works when dealing with null read groups. */ @Test public void testSplitByNullReadGroups() { SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10); GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10); GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10); ReadBackedPileup pileup = new ReadBackedPileupImpl(null, Arrays.asList(read1, read2, read3), Arrays.asList(1, 1, 1)); ReadBackedPileup nullRgPileup = pileup.getPileupForReadGroup(null); List<GATKSAMRecord> nullRgReads = nullRgPileup.getReads(); Assert.assertEquals( nullRgPileup.getNumberOfElements(), 3, "Wrong number of reads in null read group"); Assert.assertEquals( nullRgReads.get(0), read1, "Read " + read1.getReadName() + " should be in null rg but isn't"); Assert.assertEquals( nullRgReads.get(1), read2, "Read " + read2.getReadName() + " should be in null rg but isn't"); Assert.assertEquals( nullRgReads.get(2), read3, "Read " + read3.getReadName() + " should be in null rg but isn't"); ReadBackedPileup rg1Pileup = pileup.getPileupForReadGroup("rg1"); Assert.assertNull(rg1Pileup, "Pileup for non-existent read group should return null"); }
@Test public void testGetPileupForSample() { String sample1 = "sample1"; String sample2 = "sample2"; SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1"); readGroupOne.setSample(sample1); SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2"); readGroupTwo.setSample(sample2); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); header.addReadGroup(readGroupOne); header.addReadGroup(readGroupTwo); GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10); read1.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10); read2.setAttribute("RG", readGroupTwo.getId()); Map<String, ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String, ReadBackedPileupImpl>(); sampleToPileupMap.put( sample1, new ReadBackedPileupImpl(null, Collections.singletonList(read1), 0)); sampleToPileupMap.put( sample2, new ReadBackedPileupImpl(null, Collections.singletonList(read2), 0)); ReadBackedPileup pileup = new ReadBackedPileupImpl(null, sampleToPileupMap); ReadBackedPileup sample2Pileup = pileup.getPileupForSample(sample2); Assert.assertEquals( sample2Pileup.getNumberOfElements(), 1, "Sample 2 pileup has wrong number of elements"); Assert.assertEquals( sample2Pileup.getReads().get(0), read2, "Sample 2 pileup has incorrect read"); ReadBackedPileup missingSamplePileup = pileup.getPileupForSample("missing"); Assert.assertNull(missingSamplePileup, "Pileup for sample 'missing' should be null but isn't"); missingSamplePileup = pileup.getPileupForSample("not here"); Assert.assertNull(missingSamplePileup, "Pileup for sample 'not here' should be null but isn't"); }
/** * Creates a new GATKSAMRecord with the source read's header, read group and mate information, but * with the following fields set to user-supplied values: - Read Bases - Base Qualities - Base * Insertion Qualities - Base Deletion Qualities * * <p>Cigar string is empty (not-null) * * <p>Use this method if you want to create a new GATKSAMRecord based on another GATKSAMRecord, * but with modified bases and qualities * * @param read a read to copy the header from * @param readBases an array containing the new bases you wish use in place of the originals * @param baseQualities an array containing the new base qualities you wish use in place of the * originals * @param baseInsertionQualities an array containing the new base insertion qaulities * @param baseDeletionQualities an array containing the new base deletion qualities * @return a read with modified bases and qualities, safe for the GATK */ public static GATKSAMRecord createQualityModifiedRead( final GATKSAMRecord read, final byte[] readBases, final byte[] baseQualities, final byte[] baseInsertionQualities, final byte[] baseDeletionQualities) { if (baseQualities.length != readBases.length || baseInsertionQualities.length != readBases.length || baseDeletionQualities.length != readBases.length) throw new IllegalArgumentException( "Read bases and read quality arrays aren't the same size: Bases:" + readBases.length + " vs Base Q's:" + baseQualities.length + " vs Insert Q's:" + baseInsertionQualities.length + " vs Delete Q's:" + baseDeletionQualities.length); final GATKSAMRecord processedRead = GATKSAMRecord.emptyRead(read); processedRead.setReadBases(readBases); processedRead.setBaseQualities(baseQualities, EventType.BASE_SUBSTITUTION); processedRead.setBaseQualities(baseInsertionQualities, EventType.BASE_INSERTION); processedRead.setBaseQualities(baseDeletionQualities, EventType.BASE_DELETION); return processedRead; }
public double[] computeReadHaplotypeLikelihoods( ReadBackedPileup pileup, HashMap<Allele, Haplotype> haplotypesInVC) { double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()]; double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()]; int i = 0; for (GATKSAMRecord read : pileup.getReads()) { if (ReadUtils.is454Read(read)) { continue; } // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi)) // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent int j = 0; for (Map.Entry<Allele, Haplotype> a : haplotypesInVC.entrySet()) { readLikelihoods[i][j] = computeReadLikelihoodGivenHaplotype(a.getValue(), read); if (DEBUG) { System.out.print(read.getReadName() + " "); System.out.format( "%d %d S:%d US:%d E:%d UE:%d C:%s %3.4f\n", i, j, read.getAlignmentStart(), read.getUnclippedStart(), read.getAlignmentEnd(), read.getUnclippedEnd(), read.getCigarString(), readLikelihoods[i][j]); } j++; } i++; } for (i = 0; i < haplotypesInVC.size(); i++) { for (int j = i; j < haplotypesInVC.size(); j++) { // combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j] // L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2) // readLikelihoods[k][j] has log10(Pr(R_k) | H[j] ) double[] readLikelihood = new double[2]; // diploid sample for (int readIdx = 0; readIdx < pileup.getReads().size(); readIdx++) { readLikelihood[0] = -readLikelihoods[readIdx][i] / 10; readLikelihood[1] = -readLikelihoods[readIdx][j] / 10; // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+x0^x2)-log10(2) // First term is approximated by Jacobian log with table lookup. // Second term is a constant added to both likelihoods so will be ignored haplotypeLikehoodMatrix[i][j] += MathUtils.approximateLog10SumLog10(readLikelihood[0], readLikelihood[1]); } } } return getHaplotypeLikelihoods(haplotypeLikehoodMatrix); }
/** * Is this read poorly modelled by all of the alleles in this map? * * <p>A read is poorly modeled when it's likelihood is below what would be expected for a read * originating from one of the alleles given the maxErrorRatePerBase of the reads in general. * * <p>This function makes a number of key assumptions. First, that the likelihoods reflect the * total likelihood of the read. In other words, that the read would be fully explained by one of * the alleles. This means that the allele should be something like the full haplotype from which * the read might originate. * * <p>It further assumes that each error in the read occurs with likelihood of -3 (Q30 confidence * per base). So a read with a 10% error rate with Q30 bases that's 100 bp long we'd expect to see * 10 real Q30 errors even against the true haplotype. So for this read to be well modelled by at * least one allele we'd expect a likelihood to be >= 10 * -3. * * @param read the read we want to evaluate * @param log10Likelihoods a list of the log10 likelihoods of the read against a set of * haplotypes. * @param maxErrorRatePerBase the maximum error rate we'd expect for this read per base, in real * space. So 0.01 means a 1% error rate * @return true if none of the log10 likelihoods imply that the read truly originated from one of * the haplotypes */ protected boolean readIsPoorlyModelled( final GATKSAMRecord read, final Collection<Double> log10Likelihoods, final double maxErrorRatePerBase) { final double maxErrorsForRead = Math.min(2.0, Math.ceil(read.getReadLength() * maxErrorRatePerBase)); final double log10QualPerBase = -4.0; final double log10MaxLikelihoodForTrueAllele = maxErrorsForRead * log10QualPerBase; for (final double log10Likelihood : log10Likelihoods) if (log10Likelihood >= log10MaxLikelihoodForTrueAllele) return false; return true; }
/** * @param read a read containing the variant * @return number of hard clipped and low qual bases at the read end (where end is right end * w.r.t. the reference) */ public static int getNumClippedBasesAtEnd(final GATKSAMRecord read) { // check for hard clips (never consider these bases): final Cigar c = read.getCigar(); CigarElement last = c.getCigarElement(c.numCigarElements() - 1); int numEndClippedBases = 0; if (last.getOperator() == CigarOperator.H) { numEndClippedBases = last.getLength(); } final byte[] unclippedReadBases = read.getReadBases(); final byte[] unclippedReadQuals = read.getBaseQualities(); // Do a stricter base clipping than provided by CIGAR string, since this one may be too // conservative, // and may leave a string of Q2 bases still hanging off the reads. // TODO: this code may not even get used because HaplotypeCaller already hard clips low quality // tails for (int i = unclippedReadBases.length - numEndClippedBases - 1; i >= 0; i--) { if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD) numEndClippedBases++; else break; } return numEndClippedBases; }
private List<PileupElement> makeReads(final int n, final int mapq, final String op) { final int readLength = 3; final List<PileupElement> elts = new LinkedList<PileupElement>(); for (int i = 0; i < n; i++) { GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, readLength); read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); read.setBaseQualities(Utils.dupBytes((byte) 30, readLength)); read.setCigarString("1M1" + op + "1M"); read.setMappingQuality(mapq); final int baseOffset = op.equals("M") ? 1 : 0; final CigarElement cigarElement = read.getCigar().getCigarElement(1); elts.add(new PileupElement(read, baseOffset, cigarElement, 1, 0)); } return elts; }
@Test public void testRBPMappingQuals() { // create a read with high MQ final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, 10); read.setReadBases(Utils.dupBytes((byte) 'A', 10)); read.setBaseQualities(Utils.dupBytes((byte) 30, 10)); read.setCigarString("10M"); read.setMappingQuality(200); // set a MQ higher than max signed byte // now create the RBP final List<PileupElement> elts = new LinkedList<>(); elts.add(new PileupElement(read, 0, read.getCigar().getCigarElement(0), 0, 0)); final Map<String, ReadBackedPileupImpl> pileupsBySample = new HashMap<>(); pileupsBySample.put("foo", new ReadBackedPileupImpl(loc, elts)); final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, pileupsBySample); Assert.assertEquals(pileup.getMappingQuals()[0], 200); }
/** * Creates an empty GATKSAMRecord with the read's header, read group and mate information, but * empty (not-null) fields: - Cigar String - Read Bases - Base Qualities * * <p>Use this method if you want to create a new empty GATKSAMRecord based on another * GATKSAMRecord * * @param read a read to copy the header from * @return a read with no bases but safe for the GATK */ public static GATKSAMRecord emptyRead(GATKSAMRecord read) { final GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader()); emptyRead.setReferenceIndex(read.getReferenceIndex()); emptyRead.setAlignmentStart(0); emptyRead.setMappingQuality(0); // setting read indexing bin last emptyRead.setFlags(read.getFlags()); emptyRead.setMateReferenceIndex(read.getMateReferenceIndex()); emptyRead.setMateAlignmentStart(read.getMateAlignmentStart()); emptyRead.setInferredInsertSize(read.getInferredInsertSize()); emptyRead.setCigarString(""); emptyRead.setReadBases(new byte[0]); emptyRead.setBaseQualities(new byte[0]); SAMReadGroupRecord samRG = read.getReadGroup(); emptyRead.clearAttributes(); if (samRG != null) { GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG); emptyRead.setReadGroup(rg); } GATKBin.setReadIndexingBin(emptyRead, 0); return emptyRead; }
/** * Clips the bases in read according to this operation's start and stop. Uses the clipping * representation used is the one provided by algorithm argument. * * @param algorithm clipping algorithm to use * @param originalRead the read to be clipped */ public GATKSAMRecord apply(ClippingRepresentation algorithm, GATKSAMRecord originalRead) { GATKSAMRecord read = (GATKSAMRecord) originalRead.clone(); byte[] quals = read.getBaseQualities(); byte[] bases = read.getReadBases(); byte[] newBases = new byte[bases.length]; byte[] newQuals = new byte[quals.length]; switch (algorithm) { // important note: // it's not safe to call read.getReadBases()[i] = 'N' or read.getBaseQualities()[i] = 0 // because you're not guaranteed to get a pointer to the actual array of bytes in the // GATKSAMRecord case WRITE_NS: for (int i = 0; i < bases.length; i++) { if (i >= start && i <= stop) { newBases[i] = 'N'; } else { newBases[i] = bases[i]; } } read.setReadBases(newBases); break; case WRITE_Q0S: for (int i = 0; i < quals.length; i++) { if (i >= start && i <= stop) { newQuals[i] = 0; } else { newQuals[i] = quals[i]; } } read.setBaseQualities(newQuals); break; case WRITE_NS_Q0S: for (int i = 0; i < bases.length; i++) { if (i >= start && i <= stop) { newQuals[i] = 0; newBases[i] = 'N'; } else { newQuals[i] = quals[i]; newBases[i] = bases[i]; } } read.setBaseQualities(newBases); read.setReadBases(newBases); break; case HARDCLIP_BASES: read = hardClip(read, start, stop); break; case SOFTCLIP_BASES: if (read.getReadUnmappedFlag()) { // we can't process unmapped reads throw new UserException("Read Clipper cannot soft clip unmapped reads"); } // System.out.printf("%d %d %d%n", stop, start, read.getReadLength()); int myStop = stop; if ((stop + 1 - start) == read.getReadLength()) { // BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it // alone // Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be // represented with SOFTCLIP_BASES, just leaving it alone", read.getReadName())); // break; myStop--; // just decrement stop } if (start > 0 && myStop != read.getReadLength() - 1) throw new RuntimeException( String.format( "Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d", read.getReadName(), start, myStop)); Cigar oldCigar = read.getCigar(); int scLeft = 0, scRight = read.getReadLength(); if (start == 0) scLeft = myStop + 1; else scRight = start; Cigar newCigar = softClip(oldCigar, scLeft, scRight); read.setCigar(newCigar); int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar); int newStart = read.getAlignmentStart() + newClippedStart; read.setAlignmentStart(newStart); break; case REVERT_SOFTCLIPPED_BASES: read = revertSoftClippedBases(read); break; default: throw new IllegalStateException("Unexpected Clipping operator type " + algorithm); } return read; }
/** * Hard clip bases from read, from start to stop in base coordinates * * <p>If start == 0, then we will clip from the front of the read, otherwise we clip from the * right. If start == 0 and stop == 10, this would clip out the first 10 bases of the read. * * <p>Note that this function works with reads with negative alignment starts, in order to allow * us to hardClip reads that have had their soft clips reverted and so might have negative * alignment starts * * <p>Works properly with reduced reads and insertion/deletion base qualities * * @param read a non-null read * @param start a start >= 0 and < read.length * @param stop a stop >= 0 and < read.length. * @return a cloned version of read that has been properly trimmed down */ private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) { // If the read is unmapped there is no Cigar string and neither should we create a new cigar // string final CigarShift cigarShift = (read.getReadUnmappedFlag()) ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(read.getCigar(), start, stop); // the cigar may force a shift left or right (or both) in case we are left with insertions // starting or ending the read after applying the hard clip on start/stop. final int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd; final byte[] newBases = new byte[newLength]; final byte[] newQuals = new byte[newLength]; final int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart; System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength); System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); final GATKSAMRecord hardClippedRead = (GATKSAMRecord) read.clone(); hardClippedRead .resetSoftStartAndEnd(); // reset the cached soft start and end because they may have // changed now that the read was hard clipped. No need to calculate // them now. They'll be lazily calculated on the next call to // getSoftStart()/End() hardClippedRead.setBaseQualities(newQuals); hardClippedRead.setReadBases(newBases); hardClippedRead.setCigar(cigarShift.cigar); if (start == 0) hardClippedRead.setAlignmentStart( read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar)); if (read.hasBaseIndelQualities()) { final byte[] newBaseInsertionQuals = new byte[newLength]; final byte[] newBaseDeletionQuals = new byte[newLength]; System.arraycopy( read.getBaseInsertionQualities(), copyStart, newBaseInsertionQuals, 0, newLength); System.arraycopy( read.getBaseDeletionQualities(), copyStart, newBaseDeletionQuals, 0, newLength); hardClippedRead.setBaseQualities(newBaseInsertionQuals, EventType.BASE_INSERTION); hardClippedRead.setBaseQualities(newBaseDeletionQuals, EventType.BASE_DELETION); } return hardClippedRead; }
// TODO: this is bizarre -- this code counts hard clips, but then subtracts them from the read // length, which already doesn't count hard clips public static int getNumAlignedBases(final GATKSAMRecord read) { return read.getReadLength() - getNumClippedBasesAtStart(read) - getNumClippedBasesAtEnd(read); }
/** Ensure that basic read group splitting works. */ @Test public void testSplitByReadGroup() { SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1"); SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2"); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); header.addReadGroup(readGroupOne); header.addReadGroup(readGroupTwo); GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10); read1.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10); read2.setAttribute("RG", readGroupTwo.getId()); GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10); read3.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header, "read4", 0, 1, 10); read4.setAttribute("RG", readGroupTwo.getId()); GATKSAMRecord read5 = ArtificialSAMUtils.createArtificialRead(header, "read5", 0, 1, 10); read5.setAttribute("RG", readGroupTwo.getId()); GATKSAMRecord read6 = ArtificialSAMUtils.createArtificialRead(header, "read6", 0, 1, 10); read6.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read7 = ArtificialSAMUtils.createArtificialRead(header, "read7", 0, 1, 10); read7.setAttribute("RG", readGroupOne.getId()); ReadBackedPileup pileup = new ReadBackedPileupImpl( null, Arrays.asList(read1, read2, read3, read4, read5, read6, read7), Arrays.asList(1, 1, 1, 1, 1, 1, 1)); ReadBackedPileup rg1Pileup = pileup.getPileupForReadGroup("rg1"); List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads(); Assert.assertEquals(rg1Reads.size(), 4, "Wrong number of reads in read group rg1"); Assert.assertEquals( rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't"); Assert.assertEquals( rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't"); Assert.assertEquals( rg1Reads.get(2), read6, "Read " + read6.getReadName() + " should be in rg1 but isn't"); Assert.assertEquals( rg1Reads.get(3), read7, "Read " + read7.getReadName() + " should be in rg1 but isn't"); ReadBackedPileup rg2Pileup = pileup.getPileupForReadGroup("rg2"); List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads(); Assert.assertEquals(rg2Reads.size(), 3, "Wrong number of reads in read group rg2"); Assert.assertEquals( rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't"); Assert.assertEquals( rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't"); Assert.assertEquals( rg2Reads.get(2), read5, "Read " + read5.getReadName() + " should be in rg2 but isn't"); }
@Test public void testStrandlessReads() { final byte[] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'}; final byte[] quals = {20, 20, 20, 20, 20, 20, 20, 20}; GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M"); Assert.assertEquals(read.isStrandless(), false); read.setReadNegativeStrandFlag(false); Assert.assertEquals(read.isStrandless(), false); Assert.assertEquals(read.getReadNegativeStrandFlag(), false); read.setReadNegativeStrandFlag(true); Assert.assertEquals(read.isStrandless(), false); Assert.assertEquals(read.getReadNegativeStrandFlag(), true); read.setReadNegativeStrandFlag(true); read.setIsStrandless(true); Assert.assertEquals(read.isStrandless(), true); Assert.assertEquals( read.getReadNegativeStrandFlag(), false, "negative strand flag should return false even through its set for a strandless read"); }
/** Ensure that splitting read groups still works when dealing with a sample-split pileup. */ @Test public void testSplitBySample() { SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1"); readGroupOne.setSample("sample1"); SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2"); readGroupTwo.setSample("sample2"); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); header.addReadGroup(readGroupOne); header.addReadGroup(readGroupTwo); GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, 10); read1.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, 1, 10); read2.setAttribute("RG", readGroupTwo.getId()); GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header, "read3", 0, 1, 10); read3.setAttribute("RG", readGroupOne.getId()); GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header, "read4", 0, 1, 10); read4.setAttribute("RG", readGroupTwo.getId()); ReadBackedPileupImpl sample1Pileup = new ReadBackedPileupImpl(null, Arrays.asList(read1, read3), Arrays.asList(1, 1)); ReadBackedPileupImpl sample2Pileup = new ReadBackedPileupImpl(null, Arrays.asList(read2, read4), Arrays.asList(1, 1)); Map<String, ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String, ReadBackedPileupImpl>(); sampleToPileupMap.put(readGroupOne.getSample(), sample1Pileup); sampleToPileupMap.put(readGroupTwo.getSample(), sample2Pileup); ReadBackedPileup compositePileup = new ReadBackedPileupImpl(null, sampleToPileupMap); ReadBackedPileup rg1Pileup = compositePileup.getPileupForReadGroup("rg1"); List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads(); Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg1"); Assert.assertEquals( rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't"); Assert.assertEquals( rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't"); ReadBackedPileup rg2Pileup = compositePileup.getPileupForReadGroup("rg2"); List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads(); Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg2"); Assert.assertEquals( rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't"); Assert.assertEquals( rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't"); }
@Override protected boolean isUsableRead(final GATKSAMRecord read, final int refLoc) { return super.isUsableRead(read, refLoc) && read.getSoftStart() + read.getCigar().getReadLength() > refLoc; }
@Override public boolean filter(ReferenceContext ref, GATKSAMRecord read) { return !(read.getReadFailsVendorQualityCheckFlag() || read.getReadUnmappedFlag()); }