public double[] computeReadHaplotypeLikelihoods( ReadBackedPileup pileup, HashMap<Allele, Haplotype> haplotypesInVC) { double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()]; double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()]; int i = 0; for (GATKSAMRecord read : pileup.getReads()) { if (ReadUtils.is454Read(read)) { continue; } // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi)) // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent int j = 0; for (Map.Entry<Allele, Haplotype> a : haplotypesInVC.entrySet()) { readLikelihoods[i][j] = computeReadLikelihoodGivenHaplotype(a.getValue(), read); if (DEBUG) { System.out.print(read.getReadName() + " "); System.out.format( "%d %d S:%d US:%d E:%d UE:%d C:%s %3.4f\n", i, j, read.getAlignmentStart(), read.getUnclippedStart(), read.getAlignmentEnd(), read.getUnclippedEnd(), read.getCigarString(), readLikelihoods[i][j]); } j++; } i++; } for (i = 0; i < haplotypesInVC.size(); i++) { for (int j = i; j < haplotypesInVC.size(); j++) { // combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j] // L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2) // readLikelihoods[k][j] has log10(Pr(R_k) | H[j] ) double[] readLikelihood = new double[2]; // diploid sample for (int readIdx = 0; readIdx < pileup.getReads().size(); readIdx++) { readLikelihood[0] = -readLikelihoods[readIdx][i] / 10; readLikelihood[1] = -readLikelihoods[readIdx][j] / 10; // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+x0^x2)-log10(2) // First term is approximated by Jacobian log with table lookup. // Second term is a constant added to both likelihoods so will be ignored haplotypeLikehoodMatrix[i][j] += MathUtils.approximateLog10SumLog10(readLikelihood[0], readLikelihood[1]); } } } return getHaplotypeLikelihoods(haplotypeLikehoodMatrix); }
/** * Get the most likely alleles estimated across all reads in this object * * <p>Takes the most likely two alleles according to their diploid genotype likelihoods. That is, * for each allele i and j we compute p(D | i,j) where D is the read likelihoods. We track the * maximum i,j likelihood and return an object that contains the alleles i and j as well as the * max likelihood. * * <p>Note that the second most likely diploid genotype is not tracked so the resulting * MostLikelyAllele doesn't have a meaningful get best likelihood. * * @return a MostLikelyAllele object, or null if this map is empty */ public MostLikelyAllele getMostLikelyDiploidAlleles() { if (isEmpty()) return null; int hap1 = 0; int hap2 = 0; double maxElement = Double.NEGATIVE_INFINITY; for (int iii = 0; iii < alleles.size(); iii++) { final Allele iii_allele = alleles.get(iii); for (int jjj = 0; jjj <= iii; jjj++) { final Allele jjj_allele = alleles.get(jjj); double haplotypeLikelihood = 0.0; for (final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : likelihoodReadMap.entrySet()) { // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2) final double likelihood_iii = entry.getValue().get(iii_allele); final double likelihood_jjj = entry.getValue().get(jjj_allele); haplotypeLikelihood += MathUtils.approximateLog10SumLog10(likelihood_iii, likelihood_jjj) + MathUtils.LOG_ONE_HALF; // fast exit. If this diploid pair is already worse than the max, just stop and look at // the next pair if (haplotypeLikelihood < maxElement) break; } // keep track of the max element and associated indices if (haplotypeLikelihood > maxElement) { hap1 = iii; hap2 = jjj; maxElement = haplotypeLikelihood; } } } if (maxElement == Double.NEGATIVE_INFINITY) throw new IllegalStateException( "max likelihood is " + maxElement + " indicating something has gone wrong"); return new MostLikelyAllele(alleles.get(hap1), alleles.get(hap2), maxElement, maxElement); }