public double[] computeReadHaplotypeLikelihoods( ReadBackedPileup pileup, HashMap<Allele, Haplotype> haplotypesInVC) { double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()]; double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()]; int i = 0; for (GATKSAMRecord read : pileup.getReads()) { if (ReadUtils.is454Read(read)) { continue; } // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi)) // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent int j = 0; for (Map.Entry<Allele, Haplotype> a : haplotypesInVC.entrySet()) { readLikelihoods[i][j] = computeReadLikelihoodGivenHaplotype(a.getValue(), read); if (DEBUG) { System.out.print(read.getReadName() + " "); System.out.format( "%d %d S:%d US:%d E:%d UE:%d C:%s %3.4f\n", i, j, read.getAlignmentStart(), read.getUnclippedStart(), read.getAlignmentEnd(), read.getUnclippedEnd(), read.getCigarString(), readLikelihoods[i][j]); } j++; } i++; } for (i = 0; i < haplotypesInVC.size(); i++) { for (int j = i; j < haplotypesInVC.size(); j++) { // combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j] // L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2) // readLikelihoods[k][j] has log10(Pr(R_k) | H[j] ) double[] readLikelihood = new double[2]; // diploid sample for (int readIdx = 0; readIdx < pileup.getReads().size(); readIdx++) { readLikelihood[0] = -readLikelihoods[readIdx][i] / 10; readLikelihood[1] = -readLikelihoods[readIdx][j] / 10; // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+x0^x2)-log10(2) // First term is approximated by Jacobian log with table lookup. // Second term is a constant added to both likelihoods so will be ignored haplotypeLikehoodMatrix[i][j] += MathUtils.approximateLog10SumLog10(readLikelihood[0], readLikelihood[1]); } } } return getHaplotypeLikelihoods(haplotypeLikehoodMatrix); }
/** * Computes an allele biased version of the given pileup * * @param pileup the original pileup * @param downsamplingFraction the fraction of total reads to remove per allele * @return allele biased pileup */ public static ReadBackedPileup createAlleleBiasedBasePileup( final ReadBackedPileup pileup, final double downsamplingFraction) { // special case removal of all or no reads if (downsamplingFraction <= 0.0) return pileup; if (downsamplingFraction >= 1.0) return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>()); final PileupElementList[] alleleStratifiedElements = new PileupElementList[4]; for (int i = 0; i < 4; i++) alleleStratifiedElements[i] = new PileupElementList(); // start by stratifying the reads by the alleles they represent at this position for (final PileupElement pe : pileup) { final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase()); if (baseIndex != -1) alleleStratifiedElements[baseIndex].add(pe); } // make a listing of allele counts and calculate the total count final int[] alleleCounts = calculateAlleleCounts(alleleStratifiedElements); final int totalAlleleCount = (int) MathUtils.sum(alleleCounts); // do smart down-sampling final int numReadsToRemove = (int) (totalAlleleCount * downsamplingFraction); // floor final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove); final HashSet<PileupElement> readsToRemove = new HashSet<PileupElement>(numReadsToRemove); for (int i = 0; i < 4; i++) { final PileupElementList alleleList = alleleStratifiedElements[i]; // if we don't need to remove any reads, then don't if (alleleCounts[i] > targetAlleleCounts[i]) readsToRemove.addAll( downsampleElements( alleleList, alleleCounts[i], alleleCounts[i] - targetAlleleCounts[i])); } // we need to keep the reads sorted because the FragmentUtils code will expect them in // coordinate order and will fail otherwise final List<PileupElement> readsToKeep = new ArrayList<PileupElement>(totalAlleleCount - numReadsToRemove); for (final PileupElement pe : pileup) { if (!readsToRemove.contains(pe)) { readsToKeep.add(pe); } } return new ReadBackedPileupImpl( pileup.getLocation(), new ArrayList<PileupElement>(readsToKeep)); }
public Event map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { boolean hasIndel = false; boolean hasInsertion = false; boolean hasPointEvent = false; int furthestStopPos = -1; // look at the rods for indels or SNPs if (tracker != null) { for (VariantContext vc : tracker.getValues(known)) { switch (vc.getType()) { case INDEL: hasIndel = true; if (vc.isSimpleInsertion()) hasInsertion = true; break; case SNP: hasPointEvent = true; break; case MIXED: hasPointEvent = true; hasIndel = true; if (vc.isSimpleInsertion()) hasInsertion = true; break; default: break; } if (hasIndel) furthestStopPos = vc.getEnd(); } } // look at the normal context to get deletions and positions with high entropy final ReadBackedPileup pileup = context.getBasePileup(); int mismatchQualities = 0, totalQualities = 0; final byte refBase = ref.getBase(); for (PileupElement p : pileup) { // check the ends of the reads to see how far they extend furthestStopPos = Math.max(furthestStopPos, p.getRead().getAlignmentEnd()); // is it a deletion or insertion? if (p.isDeletion() || p.isBeforeInsertion()) { hasIndel = true; if (p.isBeforeInsertion()) hasInsertion = true; } // look for mismatches else if (lookForMismatchEntropy) { if (p.getBase() != refBase) mismatchQualities += p.getQual(); totalQualities += p.getQual(); } } // make sure we're supposed to look for high entropy if (lookForMismatchEntropy && pileup.getNumberOfElements() >= minReadsAtLocus && (double) mismatchQualities / (double) totalQualities >= mismatchThreshold) hasPointEvent = true; // return null if no event occurred if (!hasIndel && !hasPointEvent) return null; // return null if we didn't find any usable reads/rods associated with the event if (furthestStopPos == -1) return null; GenomeLoc eventLoc = context.getLocation(); if (hasInsertion) eventLoc = getToolkit() .getGenomeLocParser() .createGenomeLoc(eventLoc.getContig(), eventLoc.getStart(), eventLoc.getStart() + 1); EVENT_TYPE eventType = (hasIndel ? (hasPointEvent ? EVENT_TYPE.BOTH : EVENT_TYPE.INDEL_EVENT) : EVENT_TYPE.POINT_EVENT); return new Event(eventLoc, furthestStopPos, eventType); }