public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) { // if all alleles of vc1 are a contained in alleles of vc2, return true if (!vc1.getReference().equals(vc2.getReference())) return false; for (Allele a : vc1.getAlternateAlleles()) { if (!vc2.getAlternateAlleles().contains(a)) return false; } return true; }
/** * Update the attributes of the attributes map given the VariantContext to reflect the proper * chromosome-based VCF tags * * @param vc the VariantContext * @param attributes the attributes map to populate; must not be null; may contain old values * @param removeStaleValues should we remove stale values from the mapping? * @return the attributes map provided as input, returned for programming convenience */ public static Map<String, Object> calculateChromosomeCounts( VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) { final int AN = vc.getCalledChrCount(); // if everyone is a no-call, remove the old attributes if requested if (AN == 0 && removeStaleValues) { if (attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY)) attributes.remove(VCFConstants.ALLELE_COUNT_KEY); if (attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY)) attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY); if (attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY)) attributes.remove(VCFConstants.ALLELE_NUMBER_KEY); return attributes; } if (vc.hasGenotypes()) { attributes.put(VCFConstants.ALLELE_NUMBER_KEY, AN); // if there are alternate alleles, record the relevant tags if (vc.getAlternateAlleles().size() > 0) { final ArrayList<String> alleleFreqs = new ArrayList<String>(); final ArrayList<Integer> alleleCounts = new ArrayList<Integer>(); for (Allele allele : vc.getAlternateAlleles()) { int altChromosomes = vc.getCalledChrCount(allele); alleleCounts.add(altChromosomes); if (AN == 0) { alleleFreqs.add("0.0"); } else { // todo -- this is a performance problem final String freq = String.format( makePrecisionFormatStringFromDenominatorValue((double) AN), ((double) altChromosomes / (double) AN)); alleleFreqs.add(freq); } } attributes.put( VCFConstants.ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts); attributes.put( VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs); } else { attributes.put(VCFConstants.ALLELE_COUNT_KEY, 0); attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0); } } return attributes; }
@Requires({"eval != null", "comp != null"}) private EvalCompMatchType doEvalAndCompMatch( final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) { // find all of the matching comps if (comp.getType() != eval.getType()) return EvalCompMatchType.NO_MATCH; // find the comp which matches both the reference allele and alternate allele from eval final Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0); final Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0); if ((altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference()))) return EvalCompMatchType.STRICT; else return requireStrictAlleleMatch ? EvalCompMatchType.NO_MATCH : EvalCompMatchType.LENIENT; }
private boolean haveDifferentAltAlleles(VariantContext eval, VariantContext comp) { Collection<Allele> evalAlts = eval.getAlternateAlleles(); Collection<Allele> compAlts = comp.getAlternateAlleles(); if (evalAlts.size() != compAlts.size()) { return true; } else { // same size => every alt from eval must be in comp for (Allele a : evalAlts) { if (!compAlts.contains(a)) { // System.out.printf("Different alleles: %s:%d eval=%s // comp=%s\n\t\teval=%s\n\t\tcomp=%s%n", // eval.getChr(), eval.getStart(), eval.getAlleles(), // comp.getAlleles(), eval, comp); return true; } } return false; } }
private Map<String, Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) { if (!stratifiedContext.hasBasePileup()) return null; HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>(); for (Allele allele : vc.getAlternateAlleles()) alleleCounts.put(allele.getBases()[0], 0); ReadBackedPileup pileup = stratifiedContext.getBasePileup(); int totalDepth = pileup.size(); Map<String, Object> map = new HashMap<String, Object>(); map.put(getKeyNames().get(0), totalDepth); // put total depth in right away if (totalDepth == 0) return map; // done, can not compute FA at 0 coverage!! int mq0 = 0; // number of "ref" reads that are acually mq0 for (PileupElement p : pileup) { if (p.getMappingQual() == 0) { mq0++; continue; } if (alleleCounts.containsKey(p.getBase())) // non-mq0 read and it's an alt alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase()) + 1); } if (mq0 == totalDepth) return map; // if all reads are mq0, there is nothing left to do // we need to add counts in the correct order String[] fracs = new String[alleleCounts.size()]; for (int i = 0; i < vc.getAlternateAlleles().size(); i++) { fracs[i] = String.format( "%.3f", ((float) alleleCounts.get(vc.getAlternateAllele(i).getBases()[0])) / (totalDepth - mq0)); } map.put(getKeyNames().get(1), fracs); return map; }
/** * @param other VariantContext whose alternate alleles to compare against * @return true if this VariantContext has the same alternate alleles as other, regardless of * ordering. Otherwise returns false. */ public boolean hasSameAlternateAllelesAs(VariantContext other) { List<Allele> thisAlternateAlleles = getAlternateAlleles(); List<Allele> otherAlternateAlleles = other.getAlternateAlleles(); if (thisAlternateAlleles.size() != otherAlternateAlleles.size()) { return false; } for (Allele allele : thisAlternateAlleles) { if (!otherAlternateAlleles.contains(allele)) { return false; } } return true; }
public String update1( VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { nCalledLoci++; // Note from Eric: // This is really not correct. What we really want here is a polymorphic vs. monomorphic count // (i.e. on the Genotypes). // So in order to maintain consistency with the previous implementation (and the intention of // the original author), I've // added in a proxy check for monomorphic status here. // Protect against case when vc only as no-calls too - can happen if we strafity by sample and // sample as a single no-call. if (vc1.isMonomorphicInSamples()) { nRefLoci++; } else { switch (vc1.getType()) { case NO_VARIATION: // shouldn't get here break; case SNP: nVariantLoci++; nSNPs++; if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++; break; case MNP: nVariantLoci++; nMNPs++; if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++; break; case INDEL: nVariantLoci++; if (vc1.isSimpleInsertion()) nInsertions++; else if (vc1.isSimpleDeletion()) nDeletions++; else nComplex++; break; case MIXED: nVariantLoci++; nMixed++; break; case SYMBOLIC: nSymbolic++; break; default: throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType()); } } String refStr = vc1.getReference().getBaseString().toUpperCase(); String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase() : null; // if (aaStr.equals(".")) { // aaStr = refStr; // } // ref aa alt class // A C A der homozygote // A C C anc homozygote // A A A ref homozygote // A A C // A C A // A C C for (final Genotype g : vc1.getGenotypes()) { final String altStr = vc1.getAlternateAlleles().size() > 0 ? vc1.getAlternateAllele(0).getBaseString().toUpperCase() : null; switch (g.getType()) { case NO_CALL: nNoCalls++; break; case HOM_REF: nHomRef++; if (aaStr != null && altStr != null && !refStr.equalsIgnoreCase(aaStr)) { nHomDerived++; } break; case HET: nHets++; break; case HOM_VAR: nHomVar++; if (aaStr != null && altStr != null && !altStr.equalsIgnoreCase(aaStr)) { nHomDerived++; } break; case MIXED: break; default: throw new ReviewedStingException("BUG: Unexpected genotype type: " + g); } } return null; // we don't capture any interesting sites }
/** * add the genotype data * * @param vc the variant context * @param genotypeFormatKeys Genotype formatting string * @param alleleMap alleles for this context * @throws IOException for writer */ private void addGenotypeData( VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys) throws IOException { for (String sample : mHeader.getGenotypeSamples()) { mWriter.write(VCFConstants.FIELD_SEPARATOR); Genotype g = vc.getGenotype(sample); if (g == null) { // TODO -- The VariantContext needs to know what the general ploidy is of the samples // TODO -- We shouldn't be assuming diploid genotypes here! mWriter.write(VCFConstants.EMPTY_GENOTYPE); continue; } List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size()); for (String key : genotypeFormatKeys) { if (key.equals(VCFConstants.GENOTYPE_KEY)) { if (!g.isAvailable()) { throw new ReviewedStingException( "GTs cannot be missing for some samples if they are available for others in the record"); } writeAllele(g.getAllele(0), alleleMap); for (int i = 1; i < g.getPloidy(); i++) { mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED); writeAllele(g.getAllele(i), alleleMap); } continue; } Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4; // some exceptions if (key.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) { if (Math.abs(g.getNegLog10PError() - Genotype.NO_NEG_LOG_10PERROR) < 1e-6) val = VCFConstants.MISSING_VALUE_v4; else { val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL)); } } else if (key.equals(VCFConstants.GENOTYPE_FILTER_KEY)) { val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED); } VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key); if (metaData != null) { int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size()); if (numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4)) { // If we have a missing field but multiple values are expected, we need to construct a // new string with all fields. // For example, if Number=2, the string has to be ".,." StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4); for (int i = 1; i < numInFormatField; i++) { sb.append(","); sb.append(VCFConstants.MISSING_VALUE_v4); } val = sb.toString(); } } // assume that if key is absent, then the given string encoding suffices String outputValue = formatVCFField(val); if (outputValue != null) attrs.add(outputValue); } // strip off trailing missing values for (int i = attrs.size() - 1; i >= 0; i--) { if (isMissingValue(attrs.get(i))) attrs.remove(i); else break; } for (int i = 0; i < attrs.size(); i++) { if (i > 0 || genotypeFormatKeys.contains(VCFConstants.GENOTYPE_KEY)) mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR); mWriter.write(attrs.get(i)); } } }
/** * add a record to the file * * @param vc the Variant Context object * @param refBase the ref base used for indels * @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE * EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER * THE EVENT INSTEAD) */ public void add(VariantContext vc, byte refBase, boolean refBaseShouldBeAppliedToEndOfAlleles) { if (mHeader == null) throw new IllegalStateException( "The VCF Header must be written before records can be added: " + locationString()); if (doNotWriteGenotypes) vc = VariantContext.modifyGenotypes(vc, null); try { vc = VariantContext.createVariantContextWithPaddedAlleles( vc, refBase, refBaseShouldBeAppliedToEndOfAlleles); // if we are doing on the fly indexing, add the record ***before*** we write any bytes if (indexer != null) indexer.addFeature(vc, positionalStream.getPosition()); Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size()); alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup // CHROM mWriter.write(vc.getChr()); mWriter.write(VCFConstants.FIELD_SEPARATOR); // POS mWriter.write(String.valueOf(vc.getStart())); mWriter.write(VCFConstants.FIELD_SEPARATOR); // ID String ID = vc.hasID() ? vc.getID() : VCFConstants.EMPTY_ID_FIELD; mWriter.write(ID); mWriter.write(VCFConstants.FIELD_SEPARATOR); // REF alleleMap.put(vc.getReference(), "0"); String refString = vc.getReference().getDisplayString(); mWriter.write(refString); mWriter.write(VCFConstants.FIELD_SEPARATOR); // ALT if (vc.isVariant()) { Allele altAllele = vc.getAlternateAllele(0); alleleMap.put(altAllele, "1"); String alt = altAllele.getDisplayString(); mWriter.write(alt); for (int i = 1; i < vc.getAlternateAlleles().size(); i++) { altAllele = vc.getAlternateAllele(i); alleleMap.put(altAllele, String.valueOf(i + 1)); alt = altAllele.getDisplayString(); mWriter.write(","); mWriter.write(alt); } } else { mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD); } mWriter.write(VCFConstants.FIELD_SEPARATOR); // QUAL if (!vc.hasNegLog10PError()) mWriter.write(VCFConstants.MISSING_VALUE_v4); else mWriter.write(getQualValue(vc.getPhredScaledQual())); mWriter.write(VCFConstants.FIELD_SEPARATOR); // FILTER String filters = vc.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())) : (filtersWereAppliedToContext || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED); mWriter.write(filters); mWriter.write(VCFConstants.FIELD_SEPARATOR); // INFO Map<String, String> infoFields = new TreeMap<String, String>(); for (Map.Entry<String, Object> field : vc.getAttributes().entrySet()) { String key = field.getKey(); if (key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY)) continue; String outputValue = formatVCFField(field.getValue()); if (outputValue != null) infoFields.put(key, outputValue); } writeInfoString(infoFields); // FORMAT if (vc.hasAttribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY)) { mWriter.write(VCFConstants.FIELD_SEPARATOR); mWriter.write(vc.getAttributeAsString(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, "")); } else { List<String> genotypeAttributeKeys = new ArrayList<String>(); if (vc.hasGenotypes()) { genotypeAttributeKeys.addAll(calcVCFGenotypeKeys(vc)); } else if (mHeader.hasGenotypingData()) { // this needs to be done in case all samples are no-calls genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); } if (genotypeAttributeKeys.size() > 0) { String genotypeFormatString = ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys); mWriter.write(VCFConstants.FIELD_SEPARATOR); mWriter.write(genotypeFormatString); addGenotypeData(vc, alleleMap, genotypeAttributeKeys); } } mWriter.write("\n"); mWriter.flush(); // necessary so that writing to an output stream will work } catch (IOException e) { throw new RuntimeException("Unable to write the VCF object to " + locationString()); } }
public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) { // see if we need to trim common reference base from all alleles boolean trimVC; // We need to trim common reference base from all alleles in all genotypes if a ref base is // common to all alleles Allele refAllele = inputVC.getReference(); if (!inputVC.isVariant()) trimVC = false; else if (refAllele.isNull()) trimVC = false; else { trimVC = (AbstractVCFCodec.computeForwardClipping( new ArrayList<Allele>(inputVC.getAlternateAlleles()), inputVC.getReference().getDisplayString()) > 0); } // nothing to do if we don't need to trim bases if (trimVC) { List<Allele> alleles = new ArrayList<Allele>(); GenotypesContext genotypes = GenotypesContext.create(); // set the reference base for indels in the attributes Map<String, Object> attributes = new TreeMap<String, Object>(inputVC.getAttributes()); Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>(); for (Allele a : inputVC.getAlleles()) { if (a.isSymbolic()) { alleles.add(a); originalToTrimmedAlleleMap.put(a, a); } else { // get bases for current allele and create a new one with trimmed bases byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length()); Allele trimmedAllele = Allele.create(newBases, a.isReference()); alleles.add(trimmedAllele); originalToTrimmedAlleleMap.put(a, trimmedAllele); } } // detect case where we're trimming bases but resulting vc doesn't have any null allele. In // that case, we keep original representation // example: mixed records such as {TA*,TGA,TG} boolean hasNullAlleles = false; for (Allele a : originalToTrimmedAlleleMap.values()) { if (a.isNull()) hasNullAlleles = true; if (a.isReference()) refAllele = a; } if (!hasNullAlleles) return inputVC; // now we can recreate new genotypes with trimmed alleles for (final Genotype genotype : inputVC.getGenotypes()) { List<Allele> originalAlleles = genotype.getAlleles(); List<Allele> trimmedAlleles = new ArrayList<Allele>(); for (Allele a : originalAlleles) { if (a.isCalled()) trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); else trimmedAlleles.add(Allele.NO_CALL); } genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles)); } final VariantContextBuilder builder = new VariantContextBuilder(inputVC); return builder .alleles(alleles) .genotypes(genotypes) .attributes(attributes) .referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])) .make(); } return inputVC; }
private List<Haplotype> computeHaplotypes( final ReadBackedPileup pileup, final int contextSize, final int locus, final VariantContext vc) { // Compute all possible haplotypes consistent with current pileup int haplotypesToCompute = vc.getAlternateAlleles().size() + 1; final PriorityQueue<Haplotype> candidateHaplotypeQueue = new PriorityQueue<Haplotype>(100, new HaplotypeComparator()); final PriorityQueue<Haplotype> consensusHaplotypeQueue = new PriorityQueue<Haplotype>( MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER, new HaplotypeComparator()); for (final PileupElement p : pileup) { final Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize, locus); candidateHaplotypeQueue.add(haplotypeFromRead); } // Now that priority queue has been built with all reads at context, we need to merge and find // possible segregating haplotypes Haplotype elem; while ((elem = candidateHaplotypeQueue.poll()) != null) { boolean foundHaplotypeMatch = false; Haplotype lastCheckedHaplotype = null; for (final Haplotype haplotypeFromList : consensusHaplotypeQueue) { final Haplotype consensusHaplotype = getConsensusHaplotype(elem, haplotypeFromList); if (consensusHaplotype != null) { foundHaplotypeMatch = true; if (consensusHaplotype.getQualitySum() > haplotypeFromList.getQualitySum()) { consensusHaplotypeQueue.remove(haplotypeFromList); consensusHaplotypeQueue.add(consensusHaplotype); } break; } else { lastCheckedHaplotype = haplotypeFromList; } } if (!foundHaplotypeMatch && consensusHaplotypeQueue.size() < MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER) { consensusHaplotypeQueue.add(elem); } else if (!foundHaplotypeMatch && lastCheckedHaplotype != null && elem.getQualitySum() > lastCheckedHaplotype.getQualitySum()) { consensusHaplotypeQueue.remove(lastCheckedHaplotype); consensusHaplotypeQueue.add(elem); } } // Now retrieve the N most popular haplotypes if (consensusHaplotypeQueue.size() > 0) { // The consensus haplotypes are in a quality-ordered priority queue, so the best haplotypes // are just the ones at the front of the queue final Haplotype haplotype1 = consensusHaplotypeQueue.poll(); List<Haplotype> hlist = new ArrayList<Haplotype>(); hlist.add(new Haplotype(haplotype1.getBases(), 60)); for (int k = 1; k < haplotypesToCompute; k++) { Haplotype haplotype2 = consensusHaplotypeQueue.poll(); if (haplotype2 == null) { haplotype2 = haplotype1; } // Sometimes only the reference haplotype can be found hlist.add(new Haplotype(haplotype2.getBases(), 20)); } return hlist; } else return null; }
/** * Main entry function to calculate genotypes of a given VC with corresponding GL's * * @param tracker Tracker * @param refContext Reference context * @param rawContext Raw context * @param stratifiedContexts Stratified alignment contexts * @param vc Input VC * @param model GL calculation model * @param inheritAttributesFromInputVC Output VC will contain attributes inherited from input vc * @return VC with assigned genotypes */ public VariantCallContext calculateGenotypes( final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model, final boolean inheritAttributesFromInputVC, final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) { boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null; // initialize the data for this thread if that hasn't been done yet if (afcm.get() == null) { afcm.set(AFCalcFactory.createAFCalc(UAC, N, logger)); } // estimate our confidence in a reference call and return if (vc.getNSamples() == 0) { if (limitedContext) return null; return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES ? estimateReferenceConfidence(vc, stratifiedContexts, getTheta(model), false, 1.0) : generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext)); } AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model)); // is the most likely frequency conformation AC=0 for all alternate alleles? boolean bestGuessIsRef = true; // determine which alternate alleles have AF>0 final List<Allele> myAlleles = new ArrayList<Allele>(vc.getAlleles().size()); final List<Integer> alleleCountsofMLE = new ArrayList<Integer>(vc.getAlleles().size()); myAlleles.add(vc.getReference()); for (int i = 0; i < AFresult.getAllelesUsedInGenotyping().size(); i++) { final Allele alternateAllele = AFresult.getAllelesUsedInGenotyping().get(i); if (alternateAllele.isReference()) continue; // we are non-ref if the probability of being non-ref > the emit confidence. // the emit confidence is phred-scaled, say 30 => 10^-3. // the posterior AF > 0 is log10: -5 => 10^-5 // we are non-ref if 10^-5 < 10^-3 => -5 < -3 final boolean isNonRef = AFresult.isPolymorphic(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING / -10.0); // if the most likely AC is not 0, then this is a good alternate allele to use if (isNonRef) { myAlleles.add(alternateAllele); alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele)); bestGuessIsRef = false; } // if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele else if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { myAlleles.add(alternateAllele); alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele)); } } final double PoFGT0 = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0()); // note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice final double phredScaledConfidence = Math.abs( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE .GENOTYPE_GIVEN_ALLELES ? -10 * AFresult.getLog10PosteriorOfAFEq0() : -10 * AFresult.getLog10PosteriorOfAFGT0()); // return a null call if we don't pass the confidence cutoff or the most likely allele frequency // is zero if (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestGuessIsRef)) { // technically, at this point our confidence in a reference call isn't accurately estimated // because it didn't take into account samples with no data, so let's get a better estimate return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getTheta(model), true, PoFGT0); } // start constructing the resulting VC final GenomeLoc loc = genomeLocParser.createGenomeLoc(vc); final VariantContextBuilder builder = new VariantContextBuilder( "UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles); builder.log10PError(phredScaledConfidence / -10.0); if (!passesCallThreshold(phredScaledConfidence)) builder.filters(filter); // create the genotypes final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true, ploidy); builder.genotypes(genotypes); // print out stats if we have a writer if (verboseWriter != null && !limitedContext) printVerboseData(refContext.getLocus().toString(), vc, PoFGT0, phredScaledConfidence, model); // *** note that calculating strand bias involves overwriting data structures, so we do that // last final HashMap<String, Object> attributes = new HashMap<String, Object>(); // inherit attributed from input vc if requested if (inheritAttributesFromInputVC) attributes.putAll(vc.getAttributes()); // if the site was downsampled, record that fact if (!limitedContext && rawContext.hasPileupBeenDownsampled()) attributes.put(VCFConstants.DOWNSAMPLED_KEY, true); if (UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED) attributes.put(NUMBER_OF_DISCOVERED_ALLELES_KEY, vc.getAlternateAlleles().size()); // add the MLE AC and AF annotations if (alleleCountsofMLE.size() > 0) { attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE); final int AN = builder.make().getCalledChrCount(); final ArrayList<Double> MLEfrequencies = new ArrayList<Double>(alleleCountsofMLE.size()); // the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT // is ./. but the exact model may arbitrarily choose an AC>1) for (int AC : alleleCountsofMLE) MLEfrequencies.add(Math.min(1.0, (double) AC / (double) AN)); attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies); } if (UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef) { // final boolean DEBUG_SLOD = false; // the overall lod // double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0]; double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0(); // if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); List<Allele> allAllelesToUse = builder.make().getAlleles(); // the forward lod VariantContext vcForward = calculateLikelihoods( tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap); AFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model)); // double[] normalizedLog10Posteriors = // MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); double forwardLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0(); double forwardLog10PofF = AFresult.getLog10LikelihoodOfAFGT0(); // if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", // forwardLog10PofF=" + forwardLog10PofF); // the reverse lod VariantContext vcReverse = calculateLikelihoods( tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap); AFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model)); // normalizedLog10Posteriors = // MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); double reverseLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0(); double reverseLog10PofF = AFresult.getLog10LikelihoodOfAFGT0(); // if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", // reverseLog10PofF=" + reverseLog10PofF); double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF; // if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + // reverseLod); // strand score is max bias between forward and reverse strands double strandScore = Math.max(forwardLod, reverseLod); // rescale by a factor of 10 strandScore *= 10.0; // logger.debug(String.format("SLOD=%f", strandScore)); if (!Double.isNaN(strandScore)) attributes.put("SB", strandScore); } // finish constructing the resulting VC builder.attributes(attributes); VariantContext vcCall = builder.make(); // if we are subsetting alleles (either because there were too many or because some were not // polymorphic) // then we may need to trim the alleles (because the original VariantContext may have had to pad // at the end). if (myAlleles.size() != vc.getAlleles().size() && !limitedContext) // limitedContext callers need to handle allele trimming on their own to // keep their perReadAlleleLikelihoodMap alleles in sync vcCall = VariantContextUtils.reverseTrimAlleles(vcCall); if (annotationEngine != null && !limitedContext) { // limitedContext callers need to handle annotations on their own by // calling their own annotationEngine // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations final ReadBackedPileup pileup = rawContext.getBasePileup(); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); vcCall = annotationEngine.annotateContext( tracker, refContext, stratifiedContexts, vcCall, perReadAlleleLikelihoodMap); } return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PoFGT0)); }
private Map<String, Object> annotateIndel(AlignmentContext stratifiedContext, VariantContext vc) { if (!stratifiedContext.hasExtendedEventPileup()) { return null; } ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup(); if (pileup == null) return null; int totalDepth = pileup.size(); Map<String, Object> map = new HashMap<String, Object>(); map.put(getKeyNames().get(0), totalDepth); // put total depth in right away if (totalDepth == 0) return map; int mq0 = 0; // number of "ref" reads that are acually mq0 HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>(); Allele refAllele = vc.getReference(); for (Allele allele : vc.getAlternateAlleles()) { if (allele.isNoCall()) { continue; // this does not look so good, should we die??? } alleleCounts.put(getAlleleRepresentation(allele), 0); } for (ExtendedEventPileupElement e : pileup.toExtendedIterable()) { if (e.getMappingQual() == 0) { mq0++; continue; } if (e.isInsertion()) { final String b = e.getEventBases(); if (alleleCounts.containsKey(b)) { alleleCounts.put(b, alleleCounts.get(b) + 1); } } else { if (e.isDeletion()) { if (e.getEventLength() == refAllele.length()) { // this is indeed the deletion allele recorded in VC final String b = DEL; if (alleleCounts.containsKey(b)) { alleleCounts.put(b, alleleCounts.get(b) + 1); } } // else { // System.out.print(" deletion of WRONG length found"); // } } } } if (mq0 == totalDepth) return map; String[] fracs = new String[alleleCounts.size()]; for (int i = 0; i < vc.getAlternateAlleles().size(); i++) fracs[i] = String.format( "%.3f", ((float) alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i)))) / (totalDepth - mq0)); map.put(getKeyNames().get(1), fracs); // map.put(getKeyNames().get(0), counts); return map; }