public static String encodeBQTag(SAMRecord read, byte[] baq) { // Offset to base alignment quality (BAQ), of the same length as the read sequence. // At the i-th read base, BAQi = Qi - (BQi - 64) where Qi is the i-th base quality. // so BQi = Qi - BAQi + 64 byte[] bqTag = new byte[baq.length]; for (int i = 0; i < bqTag.length; i++) { final int bq = (int) read.getBaseQualities()[i] + 64; final int baq_i = (int) baq[i]; final int tag = bq - baq_i; // problem with the calculation of the correction factor; this is our problem if (tag < 0) throw new ReviewedGATKException( "BAQ tag calculation error. BAQ value above base quality at " + read); // the original quality is too high, almost certainly due to using the wrong encoding in the // BAM file if (tag > Byte.MAX_VALUE) throw new UserException.MisencodedBAM( read, "we encountered an extremely high quality score (" + (int) read.getBaseQualities()[i] + ") with BAQ correction factor of " + baq_i); bqTag[i] = (byte) tag; } return new String(bqTag); }
/** * Returns a new qual array for read that includes the BAQ adjustment. Does not support on-the-fly * BAQ calculation * * @param read the SAMRecord to operate on * @param overwriteOriginalQuals If true, we replace the original qualities scores in the read * with their BAQ'd version * @param useRawQualsIfNoBAQTag If useRawQualsIfNoBAQTag is true, then if there's no BAQ * annotation we just use the raw quality scores. Throws IllegalStateException is false and no * BAQ tag is present * @return */ public static byte[] calcBAQFromTag( SAMRecord read, boolean overwriteOriginalQuals, boolean useRawQualsIfNoBAQTag) { byte[] rawQuals = read.getBaseQualities(); byte[] newQuals = rawQuals; byte[] baq = getBAQTag(read); if (baq != null) { // Offset to base alignment quality (BAQ), of the same length as the read sequence. // At the i-th read base, BAQi = Qi - (BQi - 64) where Qi is the i-th base quality. newQuals = overwriteOriginalQuals ? rawQuals : new byte[rawQuals.length]; for (int i = 0; i < rawQuals.length; i++) { int rawQual = (int) rawQuals[i]; int baq_delta = (int) baq[i] - 64; int newval = rawQual - baq_delta; if (newval < 0) throw new UserException.MalformedBAM( read, "BAQ tag error: the BAQ value is larger than the base quality"); newQuals[i] = (byte) newval; } } else if (!useRawQualsIfNoBAQTag) { throw new IllegalStateException( "Required BAQ tag to be present, but none was on read " + read.getReadName()); } return newQuals; }
/** Note: this is the only getKey function that handles unmapped reads specially! */ public static long getKey(final SAMRecord rec) { final int refIdx = rec.getReferenceIndex(); final int start = rec.getAlignmentStart(); if (!(rec.getReadUnmappedFlag() || refIdx < 0 || start < 0)) return getKey(refIdx, start); // Put unmapped reads at the end, but don't give them all the exact same // key so that they can be distributed to different reducers. // // A random number would probably be best, but to ensure that the same // record always gets the same key we use a fast hash instead. // // We avoid using hashCode(), because it's not guaranteed to have the // same value across different processes. int hash = 0; byte[] var; if ((var = rec.getVariableBinaryRepresentation()) != null) { // Undecoded BAM record: just hash its raw data. hash = (int) MurmurHash3.murmurhash3(var, hash); } else { // Decoded BAM record or any SAM record: hash a few representative // fields together. hash = (int) MurmurHash3.murmurhash3(rec.getReadName(), hash); hash = (int) MurmurHash3.murmurhash3(rec.getReadBases(), hash); hash = (int) MurmurHash3.murmurhash3(rec.getBaseQualities(), hash); hash = (int) MurmurHash3.murmurhash3(rec.getCigarString(), hash); } return getKey0(Integer.MAX_VALUE, hash); }
/** * @param read a read containing the variant * @return the number of hard clipped and low qual bases at the read start (where start is the * leftmost end w.r.t. the reference) */ public static int getNumClippedBasesAtStart(final SAMRecord read) { // check for hard clips (never consider these bases): final Cigar c = read.getCigar(); final CigarElement first = c.getCigarElement(0); int numStartClippedBases = 0; if (first.getOperator() == CigarOperator.H) { numStartClippedBases = first.getLength(); } final byte[] unclippedReadBases = read.getReadBases(); final byte[] unclippedReadQuals = read.getBaseQualities(); // Do a stricter base clipping than provided by CIGAR string, since this one may be too // conservative, // and may leave a string of Q2 bases still hanging off the reads. // TODO: this code may not even get used because HaplotypeCaller already hard clips low quality // tails for (int i = numStartClippedBases; i < unclippedReadBases.length; i++) { if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD) numStartClippedBases++; else break; } return numStartClippedBases; }
/** * HACK TO CREATE GATKSAMRECORD BASED ONLY A SAMRECORD FOR TESTING PURPOSES ONLY * * @param read */ public GATKSAMRecord(final SAMRecord read) { super(read.getHeader()); super.setReferenceIndex(read.getReferenceIndex()); super.setAlignmentStart(read.getAlignmentStart()); super.setReadName(read.getReadName()); super.setMappingQuality(read.getMappingQuality()); // indexing bin done below super.setCigar(read.getCigar()); super.setFlags(read.getFlags()); super.setMateReferenceIndex(read.getMateReferenceIndex()); super.setMateAlignmentStart(read.getMateAlignmentStart()); super.setInferredInsertSize(read.getInferredInsertSize()); SAMReadGroupRecord samRG = read.getReadGroup(); SAMBinaryTagAndValue samAttr = GATKBin.getReadBinaryAttributes(read); if (samAttr == null) { clearAttributes(); } else { setAttributes(samAttr); } if (samRG != null) { GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG); setReadGroup(rg); } super.setFileSource(read.getFileSource()); super.setReadName(read.getReadName()); super.setCigarString(read.getCigarString()); super.setReadBases(read.getReadBases()); super.setBaseQualities(read.getBaseQualities()); // From SAMRecord constructor: Do this after the above because setCigarString will clear it. GATKBin.setReadIndexingBin(this, GATKBin.getReadIndexingBin(read)); }
/** * Modifies read in place so that the base quality scores are capped by the BAQ calculation. Uses * the BAQ tag if present already and alwaysRecalculate is false, otherwise fires up the HMM and * does the BAQ on the fly using the refReader to obtain the reference bases as needed. * * @param read * @param refReader * @param calculationType * @return BQ qualities for use, in case qmode is DONT_MODIFY */ public byte[] baqRead( SAMRecord read, IndexedFastaSequenceFile refReader, CalculationMode calculationType, QualityMode qmode) { if (DEBUG) System.out.printf("BAQ %s read %s%n", calculationType, read.getReadName()); byte[] BAQQuals = read.getBaseQualities(); // in general we are overwriting quals, so just get a pointer to // them if (calculationType == CalculationMode.OFF) { // we don't want to do anything ; // just fall though } else if (excludeReadFromBAQ(read)) {; // just fall through } else { final boolean readHasBAQTag = hasBAQTag(read); if (calculationType == CalculationMode.RECALCULATE || !readHasBAQTag) { if (DEBUG) System.out.printf(" Calculating BAQ on the fly%n"); BAQCalculationResult hmmResult = calcBAQFromHMM(read, refReader); if (hmmResult != null) { switch (qmode) { case ADD_TAG: addBAQTag(read, hmmResult.bq); break; case OVERWRITE_QUALS: System.arraycopy(hmmResult.bq, 0, read.getBaseQualities(), 0, hmmResult.bq.length); break; case DONT_MODIFY: BAQQuals = hmmResult.bq; break; default: throw new ReviewedGATKException("BUG: unexpected qmode " + qmode); } } else if (readHasBAQTag) { // remove the BAQ tag if it's there because we cannot trust it read.setAttribute(BAQ_TAG, null); } } else if (qmode == QualityMode.OVERWRITE_QUALS) { // only makes sense if we are overwriting quals if (DEBUG) System.out.printf(" Taking BAQ from tag%n"); // this overwrites the original qualities calcBAQFromTag(read, true, false); } } return BAQQuals; }
@Test public void testFixBadQuals() { final byte[] fixedQuals = {28, 29, 31, 32, 33, 30, 31, 27, 26, 25}; final byte[] badQuals = {59, 60, 62, 63, 64, 61, 62, 58, 57, 56}; final ReadTransformer tr = new MisencodedBaseQualityReadTransformer(); final SAMRecord read = createRead(badQuals); final SAMRecord fixedRead = tr.apply(read); Assert.assertEquals(fixedQuals, fixedRead.getBaseQualities()); }
// we need to pad ref by at least the bandwidth / 2 on either side public BAQCalculationResult calcBAQFromHMM(SAMRecord read, byte[] ref, int refOffset) { // todo -- need to handle the case where the cigar sum of lengths doesn't cover the whole read Pair<Integer, Integer> queryRange = calculateQueryRange(read); if (queryRange == null) return null; // read has Ns, or is completely clipped away int queryStart = queryRange.getFirst(); int queryEnd = queryRange.getSecond(); BAQCalculationResult baqResult = calcBAQFromHMM(ref, read.getReadBases(), read.getBaseQualities(), queryStart, queryEnd); // cap quals int readI = 0, refI = 0; for (CigarElement elt : read.getCigar().getCigarElements()) { int l = elt.getLength(); switch (elt.getOperator()) { case N: // cannot handle these return null; case H: case P: // ignore pads and hard clips break; case S: refI += l; // move the reference too, in addition to I case I: // todo -- is it really the case that we want to treat I and S the same? for (int i = readI; i < readI + l; i++) baqResult.bq[i] = baqResult.rawQuals[i]; readI += l; break; case D: refI += l; break; case M: for (int i = readI; i < readI + l; i++) { int expectedPos = refI - refOffset + (i - readI); baqResult.bq[i] = capBaseByBAQ( baqResult.rawQuals[i], baqResult.bq[i], baqResult.state[i], expectedPos); } readI += l; refI += l; break; default: throw new ReviewedGATKException( "BUG: Unexpected CIGAR element " + elt + " in read " + read.getReadName()); } } if (readI != read.getReadLength()) // odd cigar string System.arraycopy(baqResult.rawQuals, 0, baqResult.bq, 0, baqResult.bq.length); return baqResult; }
/** * Returns the BAQ adjusted quality score for this read at this offset. Does not support * on-the-fly BAQ calculation * * @param read the SAMRecord to operate on * @param offset the offset of operate on * @param useRawQualsIfNoBAQTag If useRawQualsIfNoBAQTag is true, then if there's no BAQ * annotation we just use the raw quality scores. Throws IllegalStateException is false and no * BAQ tag is present * @return */ public static byte calcBAQFromTag(SAMRecord read, int offset, boolean useRawQualsIfNoBAQTag) { byte rawQual = read.getBaseQualities()[offset]; byte newQual = rawQual; byte[] baq = getBAQTag(read); if (baq != null) { // Offset to base alignment quality (BAQ), of the same length as the read sequence. // At the i-th read base, BAQi = Qi - (BQi - 64) where Qi is the i-th base quality. int baq_delta = (int) baq[offset] - 64; int newval = rawQual - baq_delta; if (newval < 0) throw new UserException.MalformedBAM( read, "BAQ tag error: the BAQ value is larger than the base quality"); newQual = (byte) newval; } else if (!useRawQualsIfNoBAQTag) { throw new IllegalStateException( "Required BAQ tag to be present, but none was on read " + read.getReadName()); } return newQual; }
public double computeReadLikelihoodGivenHaplotype(Haplotype haplotype, SAMRecord read) { long numStartClippedBases = 0; long numEndClippedBases = 0; byte[] unclippedReadQuals = read.getBaseQualities(); byte[] unclippedReadBases = read.getReadBases(); // Do a stricter base clipping than provided by CIGAR string, since this one may be too // conservative, // and may leave a string of Q2 bases still hanging off the reads. for (int i = 0; i < read.getReadLength(); i++) { if (unclippedReadQuals[i] < BASE_QUAL_THRESHOLD) numStartClippedBases++; else break; } for (int i = read.getReadLength() - 1; i >= 0; i--) { if (unclippedReadQuals[i] < BASE_QUAL_THRESHOLD) numEndClippedBases++; else break; } // System.out.format("numstart: %d numend: %d\n", numStartClippedBases, numEndClippedBases); if (numStartClippedBases + numEndClippedBases >= read.getReadLength()) { return 0; /// Double.POSITIVE_INFINITY; } byte[] readBases = Arrays.copyOfRange( unclippedReadBases, (int) numStartClippedBases, (int) (read.getReadBases().length - numEndClippedBases)); byte[] readQuals = Arrays.copyOfRange( unclippedReadQuals, (int) numStartClippedBases, (int) (read.getReadBases().length - numEndClippedBases)); int readLength = readBases.length; // initialize path metric and traceback memories for Viterbi computation pathMetricArray = new double[readLength + 1][PATH_METRIC_TABLE_LENGTH]; bestStateIndexArray = new int[readLength + 1][PATH_METRIC_TABLE_LENGTH]; for (int k = 1; k < PATH_METRIC_TABLE_LENGTH; k++) pathMetricArray[0][k] = 0; /* if (doSimpleCalculationModel) { // No Viterbi algorithm - assume no sequencing indel artifacts, // so we can collapse computations and pr(read | haplotype) is just probability of observing overlap // of read with haplotype. int haplotypeIndex = initialIndexInHaplotype; double c = 0.0;//deletionErrorProbabilities[1] +logOneMinusInsertionStartProbability; // compute likelihood of portion of base to the left of the haplotype for (int indR=readStartIdx-1; indR >= 0; indR--) { byte readBase = readBases[indR]; byte readQual = readQuals[indR]; if (readQual <= 2) continue; double pBaseRead = getProbabilityOfReadBaseGivenXandI((byte)0, readBase, readQual, LEFT_ALIGN_INDEX, 0); // pBaseRead has -10*log10(Prob(base[i]|haplotype[i]) pRead += pBaseRead; } //System.out.format("\nSt: %d Pre-Likelihood:%f\n",readStartIdx, pRead); for (int indR=readStartIdx; indR < readBases.length; indR++) { byte readBase = readBases[indR]; byte readQual = readQuals[indR]; byte haplotypeBase; if (haplotypeIndex < RIGHT_ALIGN_INDEX) haplotypeBase = haplotype.getBases()[haplotypeIndex]; else haplotypeBase = (byte)0; // dummy double pBaseRead = getProbabilityOfReadBaseGivenXandI(haplotypeBase, readBase, readQual, haplotypeIndex, 0); if (haplotypeBase != 0) pBaseRead += c; // pBaseRead has -10*log10(Prob(base[i]|haplotype[i]) if (readQual > 3) pRead += pBaseRead; haplotypeIndex++; if (haplotypeIndex >= haplotype.getBases().length) haplotypeIndex = RIGHT_ALIGN_INDEX; //System.out.format("H:%c R:%c RQ:%d HI:%d %4.5f %4.5f\n", haplotypeBase, readBase, (int)readQual, haplotypeIndex, pBaseRead, pRead); } //System.out.format("\nSt: %d Post-Likelihood:%f\n",readStartIdx, pRead); if (DEBUG) { System.out.println(read.getReadName()); System.out.print("Haplotype:"); for (int k=0; k <haplotype.getBases().length; k++) { System.out.format("%c ", haplotype.getBases()[k]); } System.out.println(); System.out.print("Read bases: "); for (int k=0; k <readBases.length; k++) { System.out.format("%c ", readBases[k]); } System.out.format("\nLikelihood:%f\n",pRead); } if (read.getReadName().contains("106880")) { System.out.println("aca"); System.out.println("Haplotype:"); for (int k=initialIndexInHaplotype; k <haplotype.getBases().length; k++) { System.out.format("%c ", haplotype.getBases()[k]); } System.out.println(); System.out.println("Read bases: "); for (int k=readStartIdx; k <readBases.length; k++) { System.out.format("%c ", readBases[k]); } } return pRead; } */ // Update path metric computations based on branch metric (Add/Compare/Select operations) // do forward direction first, ie from anchor to end of read // outer loop for (int indR = 0; indR < readLength; indR++) { byte readBase = readBases[indR]; byte readQual = readQuals[indR]; for (int indX = LEFT_ALIGN_INDEX; indX <= RIGHT_ALIGN_INDEX; indX++) { byte haplotypeBase; if (indX > LEFT_ALIGN_INDEX && indX < RIGHT_ALIGN_INDEX) haplotypeBase = haplotype.getBases()[indX - 1]; else haplotypeBase = readBase; updatePathMetrics(haplotypeBase, indX, indR, readBase, readQual); } } // for debugging only: compute backtracking to find optimal route through trellis. Since I'm // only interested // in log-likelihood of best state, this isn't really necessary. double bestMetric = MathUtils.arrayMin(pathMetricArray[readLength]); if (DEBUG) { System.out.println(read.getReadName()); System.out.print("Haplotype:"); for (int k = 0; k < haplotype.getBases().length; k++) { System.out.format("%c ", haplotype.getBases()[k]); } System.out.println(); System.out.print("Read bases: "); for (int k = 0; k < readBases.length; k++) { System.out.format("%c ", readBases[k]); } System.out.println(); System.out.print("Read quals: "); for (int k = 0; k < readQuals.length; k++) { System.out.format("%d ", (int) readQuals[k]); } System.out.println(); // start from last position of read, go backwards to find optimal alignment int[] bestIndexArray = new int[readLength]; int bestIndex = MathUtils.minElementIndex(pathMetricArray[readLength]); bestIndexArray[readLength - 1] = bestIndex; for (int k = readLength - 2; k >= 0; k--) { bestIndex = bestStateIndexArray[k][bestIndex]; bestIndexArray[k] = bestIndex; } System.out.print("Alignment: "); for (int k = 0; k < readBases.length; k++) { System.out.format("%d ", bestIndexArray[k]); } System.out.println(); } // now just take optimum along all path metrics: that's the log likelihood of best alignment if (DEBUG) System.out.format("Likelihood: %5.4f\n", bestMetric); return bestMetric; }
/** * Main method for the program. Checks that all input files are present and readable and that the * output file can be written to. Then iterates through all the records accumulating metrics. * Finally writes metrics file */ protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); final Histogram<Integer> mismatchesHist = new Histogram<Integer>("Predicted", "Mismatches"); final Histogram<Integer> totalHist = new Histogram<Integer>("Predicted", "Total_Bases"); final Map<String, Histogram> mismatchesByTypeHist = new HashMap<String, Histogram>(); final Map<String, Histogram> totalByTypeHist = new HashMap<String, Histogram>(); // Set up the histograms byte[] bases = {'A', 'C', 'G', 'T'}; for (final byte base : bases) { final Histogram<Integer> h = new Histogram<Integer>("Predicted", (char) base + ">"); mismatchesByTypeHist.put((char) base + ">", h); final Histogram<Integer> h2 = new Histogram<Integer>("Predicted", ">" + (char) base); mismatchesByTypeHist.put(">" + (char) base, h2); } for (final byte base : bases) { final Histogram<Integer> h = new Histogram<Integer>("Predicted", (char) base + ">"); totalByTypeHist.put((char) base + ">", h); final Histogram<Integer> h2 = new Histogram<Integer>("Predicted", ">" + (char) base); totalByTypeHist.put(">" + (char) base, h2); } for (final SAMRecord record : reader) { // Ignore these as we don't know the truth if (record.getReadUnmappedFlag() || record.isSecondaryOrSupplementary()) { continue; } final byte[] readBases = record.getReadBases(); final byte[] readQualities = record.getBaseQualities(); final byte[] refBases = SequenceUtil.makeReferenceFromAlignment(record, false); // We've seen stranger things if (readQualities.length != readBases.length) { throw new PicardException( "Missing Qualities (" + readQualities.length + "," + readBases.length + ") : " + record.getSAMString()); } if (refBases.length != readBases.length) { throw new PicardException( "The read length did not match the inferred reference length, please check your MD and CIGAR."); } int cycleIndex; // zero-based if (record.getReadNegativeStrandFlag()) { cycleIndex = readBases.length - 1 + CYCLE_OFFSET; } else { cycleIndex = CYCLE_OFFSET; } for (int i = 0; i < readBases.length; i++) { if (-1 == CYCLE || cycleIndex == CYCLE) { if ('-' != refBases[i] && '0' != refBases[i]) { // not insertion and not soft-clipped if (!SequenceUtil.basesEqual(readBases[i], refBases[i])) { // mismatch mismatchesHist.increment((int) readQualities[i]); if (SequenceUtil.isValidBase(refBases[i])) { mismatchesByTypeHist .get((char) refBases[i] + ">") .increment((int) readQualities[i]); } if (SequenceUtil.isValidBase(readBases[i])) { mismatchesByTypeHist .get(">" + (char) readBases[i]) .increment((int) readQualities[i]); } } else { mismatchesHist.increment( (int) readQualities[i], 0); // to make sure the bin will exist } totalHist.increment((int) readQualities[i]); if (SequenceUtil.isValidBase(readBases[i])) { totalByTypeHist.get(">" + (char) readBases[i]).increment((int) readQualities[i]); } if (SequenceUtil.isValidBase(refBases[i])) { totalByTypeHist.get((char) refBases[i] + ">").increment((int) readQualities[i]); } } } cycleIndex += record.getReadNegativeStrandFlag() ? -1 : 1; } } CloserUtil.close(reader); final Histogram<Integer> hist = new Histogram<Integer>("Predicted", "Observed"); double sumOfSquaresError = 0.0; // compute the aggregate phred values for (final Integer key : mismatchesHist.keySet()) { final double numMismatches = mismatchesHist.get(key).getValue(); final double numBases = totalHist.get(key).getValue(); final double phredErr = Math.log10(numMismatches / numBases) * -10.0; sumOfSquaresError += (0 == numMismatches) ? 0.0 : (key - phredErr) * (key - phredErr); hist.increment(key, phredErr); // make sure the bin will exist for (final byte base : bases) { mismatchesByTypeHist.get(">" + (char) base).increment(key, 0.0); mismatchesByTypeHist.get((char) base + ">").increment(key, 0.0); totalByTypeHist.get(">" + (char) base).increment(key, 0.0); totalByTypeHist.get((char) base + ">").increment(key, 0.0); } } final QualityScoreAccuracyMetrics metrics = new QualityScoreAccuracyMetrics(); metrics.SUM_OF_SQUARE_ERROR = sumOfSquaresError; final MetricsFile<QualityScoreAccuracyMetrics, Integer> out = getMetricsFile(); out.addMetric(metrics); out.addHistogram(hist); for (final byte base : bases) { // >base : histograms for mismatches *to* the given base Histogram<Integer> m = mismatchesByTypeHist.get(">" + (char) base); Histogram<Integer> t = totalByTypeHist.get(">" + (char) base); Histogram<Integer> h = new Histogram<Integer>(m.getBinLabel(), m.getValueLabel()); for (final Integer key : m.keySet()) { final double numMismatches = m.get(key).getValue(); final double numBases = t.get(key).getValue(); final double phredErr = Math.log10(numMismatches / numBases) * -10.0; h.increment(key, phredErr); } out.addHistogram(h); // base> : histograms for mismatches *from* the given base m = mismatchesByTypeHist.get((char) base + ">"); t = totalByTypeHist.get(">" + (char) base); h = new Histogram<Integer>(m.getBinLabel(), m.getValueLabel()); for (final Integer key : m.keySet()) { final double numMismatches = m.get(key).getValue(); final double numBases = t.get(key).getValue(); final double phredErr = Math.log10(numMismatches / numBases) * -10.0; h.increment(key, phredErr); } out.addHistogram(h); } out.addHistogram(mismatchesHist); out.addHistogram(totalHist); out.write(OUTPUT); return 0; }
@Override protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) { // see if the whole read should be skipped if (recordFilter.filterOut(rec)) return; // check read group + library final String library = (rec.getReadGroup() == null) ? UNKNOWN_LIBRARY : getOrElse(rec.getReadGroup().getLibrary(), UNKNOWN_LIBRARY); if (!libraries.contains(library)) { // should never happen if SAM is valid throw new PicardException("Record contains library that is missing from header: " + library); } // set up some constants that don't change in the loop below final int contextFullLength = 2 * CONTEXT_SIZE + 1; final ArtifactCounter counter = artifactCounters.get(library); final byte[] readBases = rec.getReadBases(); final byte[] readQuals; if (USE_OQ) { final byte[] tmp = rec.getOriginalBaseQualities(); readQuals = tmp == null ? rec.getBaseQualities() : tmp; } else { readQuals = rec.getBaseQualities(); } // iterate over aligned positions for (final AlignmentBlock block : rec.getAlignmentBlocks()) { for (int offset = 0; offset < block.getLength(); offset++) { // remember, these are 1-based! final int readPos = block.getReadStart() + offset; final int refPos = block.getReferenceStart() + offset; // skip low BQ sites final byte qual = readQuals[readPos - 1]; if (qual < MINIMUM_QUALITY_SCORE) continue; // skip N bases in read final char readBase = Character.toUpperCase((char) readBases[readPos - 1]); if (readBase == 'N') continue; /** * Skip regions outside of intervals. * * <p>NB: IntervalListReferenceSequenceMask.get() has side-effects which assume that * successive ReferenceSequence's passed to this method will be in-order (e.g. it will break * if you call acceptRead() with chr1, then chr2, then chr1 again). So this only works if * the underlying iteration is coordinate-sorted. */ if (intervalMask != null && !intervalMask.get(ref.getContigIndex(), refPos)) continue; // skip dbSNP sites if (dbSnpMask != null && dbSnpMask.isDbSnpSite(ref.getName(), refPos)) continue; // skip the ends of the reference final int contextStartIndex = refPos - CONTEXT_SIZE - 1; if (contextStartIndex < 0 || contextStartIndex + contextFullLength > ref.length()) continue; // skip contexts with N bases final String context = getRefContext(ref, contextStartIndex, contextFullLength); if (context.contains("N")) continue; // count the base! counter.countRecord(context, readBase, rec); } } }
public BAQCalculationResult(SAMRecord read, byte[] ref) { this(read.getBaseQualities(), read.getReadBases(), ref); }