private Map<String, Object> calculateIC(final VariantContext vc) { final GenotypesContext genotypes = (founderIds == null || founderIds.isEmpty()) ? vc.getGenotypes() : vc.getGenotypes(founderIds); if (genotypes == null || genotypes.size() < MIN_SAMPLES) return null; int idxAA = 0, idxAB = 1, idxBB = 2; if (!vc.isBiallelic()) { // for non-bliallelic case, do test with most common alt allele. // Get then corresponding indeces in GL vectors to retrieve GL of AA,AB and BB. int[] idxVector = vc.getGLIndecesOfAlternateAllele(vc.getAltAlleleWithHighestAlleleCount()); idxAA = idxVector[0]; idxAB = idxVector[1]; idxBB = idxVector[2]; } double refCount = 0.0; double hetCount = 0.0; double homCount = 0.0; int N = 0; // number of samples that have likelihoods for (final Genotype g : genotypes) { if (g.isNoCall() || !g.hasLikelihoods()) continue; if (g.getPloidy() != 2) // only work for diploid samples continue; N++; final double[] normalizedLikelihoods = MathUtils.normalizeFromLog10(g.getLikelihoods().getAsVector()); refCount += normalizedLikelihoods[idxAA]; hetCount += normalizedLikelihoods[idxAB]; homCount += normalizedLikelihoods[idxBB]; } if (N < MIN_SAMPLES) { return null; } final double p = (2.0 * refCount + hetCount) / (2.0 * (refCount + hetCount + homCount)); // expected reference allele frequency final double q = 1.0 - p; // expected alternative allele frequency final double F = 1.0 - (hetCount / (2.0 * p * q * (double) N)); // inbreeding coefficient Map<String, Object> map = new HashMap<String, Object>(); map.put(getKeyNames().get(0), String.format("%.4f", F)); return map; }
/** * Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, * AC, AF). * * @param vc the VariantContext record to subset * @param samples the samples to extract * @return the subsetted VariantContext */ private VariantContext subsetRecord(VariantContext vc, Set<String> samples) { if (samples == null || samples.isEmpty()) return vc; ArrayList<Genotype> genotypes = new ArrayList<Genotype>(); for (Map.Entry<String, Genotype> genotypePair : vc.getGenotypes().entrySet()) { if (samples.contains(genotypePair.getKey())) genotypes.add(genotypePair.getValue()); } VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles()); // if we have fewer alternate alleles in the selected VC than in the original VC, we need to // strip out the GL/PLs (because they are no longer accurate) if (vc.getAlleles().size() != sub.getAlleles().size()) sub = VariantContext.modifyGenotypes(sub, VariantContextUtils.stripPLs(vc.getGenotypes())); HashMap<String, Object> attributes = new HashMap<String, Object>(sub.getAttributes()); int depth = 0; for (String sample : sub.getSampleNames()) { Genotype g = sub.getGenotype(sample); if (g.isNotFiltered() && g.isCalled()) { String dp = (String) g.getAttribute("DP"); if (dp != null && !dp.equals(VCFConstants.MISSING_DEPTH_v3) && !dp.equals(VCFConstants.MISSING_VALUE_v4)) { depth += Integer.valueOf(dp); } } } if (KEEP_ORIGINAL_CHR_COUNTS) { if (attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY)) attributes.put("AC_Orig", attributes.get(VCFConstants.ALLELE_COUNT_KEY)); if (attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY)) attributes.put("AF_Orig", attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY)); if (attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY)) attributes.put("AN_Orig", attributes.get(VCFConstants.ALLELE_NUMBER_KEY)); } VariantContextUtils.calculateChromosomeCounts(sub, attributes, false); attributes.put("DP", depth); sub = VariantContext.modifyAttributes(sub, attributes); return sub; }
private static List<String> calcVCFGenotypeKeys(VariantContext vc) { Set<String> keys = new HashSet<String>(); boolean sawGoodGT = false; boolean sawGoodQual = false; boolean sawGenotypeFilter = false; for (Genotype g : vc.getGenotypes().values()) { keys.addAll(g.getAttributes().keySet()); if (g.isAvailable()) sawGoodGT = true; if (g.hasNegLog10PError()) sawGoodQual = true; if (g.isFiltered() && g.isCalled()) sawGenotypeFilter = true; } if (sawGoodQual) keys.add(VCFConstants.GENOTYPE_QUALITY_KEY); if (sawGenotypeFilter) keys.add(VCFConstants.GENOTYPE_FILTER_KEY); List<String> sortedList = ParsingUtils.sortList(new ArrayList<String>(keys)); // make sure the GT is first if (sawGoodGT) { List<String> newList = new ArrayList<String>(sortedList.size() + 1); newList.add(VCFConstants.GENOTYPE_KEY); newList.addAll(sortedList); sortedList = newList; } return sortedList; }
/** * Checks if vc has a variant call for (at least one of) the samples. * * @param vc the variant rod VariantContext. Here, the variant is the dataset you're looking for * discordances to (e.g. HapMap) * @param compVCs the comparison VariantContext (discordance * @return */ private boolean isDiscordant(VariantContext vc, Collection<VariantContext> compVCs) { if (vc == null) return false; // if we're not looking at specific samples then the absence of a compVC means discordance if (NO_SAMPLES_SPECIFIED) return (compVCs == null || compVCs.isEmpty()); // check if we find it in the variant rod Map<String, Genotype> genotypes = vc.getGenotypes(samples); for (Genotype g : genotypes.values()) { if (sampleHasVariant(g)) { // There is a variant called (or filtered with not exclude filtered option set) that is not // HomRef for at least one of the samples. if (compVCs == null) return true; // Look for this sample in the all vcs of the comp ROD track. boolean foundVariant = false; for (VariantContext compVC : compVCs) { if (sampleHasVariant(compVC.getGenotype(g.getSampleName()))) { foundVariant = true; break; } } // if (at least one sample) was not found in all VCs of the comp ROD, we have discordance if (!foundVariant) return true; } } return false; // we only get here if all samples have a variant in the comp rod. }
static boolean allGenotypesAreUnfilteredAndCalled(VariantContext vc) { for (final Genotype gt : vc.getGenotypes()) { if (gt.isNoCall() || gt.isFiltered()) return false; } return true; }
/** * Returns a context identical to this with the REF and ALT alleles reverse complemented. * * @param vc variant context * @return new vc */ public static VariantContext reverseComplement(VariantContext vc) { // create a mapping from original allele to reverse complemented allele HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size()); for (Allele originalAllele : vc.getAlleles()) { Allele newAllele; if (originalAllele.isNoCall() || originalAllele.isNull()) newAllele = originalAllele; else newAllele = Allele.create( BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference()); alleleMap.put(originalAllele, newAllele); } // create new Genotype objects GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples()); for (final Genotype genotype : vc.getGenotypes()) { List<Allele> newAlleles = new ArrayList<Allele>(); for (Allele allele : genotype.getAlleles()) { Allele newAllele = alleleMap.get(allele); if (newAllele == null) newAllele = Allele.NO_CALL; newAlleles.add(newAllele); } newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles)); } return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make(); }
protected final void printCallInfo( final VariantContext vc, final double[] log10AlleleFrequencyPriors, final long runtimeNano, final AFCalcResult result) { printCallElement(vc, "type", "ignore", vc.getType()); int allelei = 0; for (final Allele a : vc.getAlleles()) printCallElement(vc, "allele", allelei++, a.getDisplayString()); for (final Genotype g : vc.getGenotypes()) printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString()); for (int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++) printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]); printCallElement(vc, "runtime.nano", "ignore", runtimeNano); printCallElement(vc, "log10PosteriorOfAFEq0", "ignore", result.getLog10PosteriorOfAFEq0()); printCallElement(vc, "log10PosteriorOfAFGt0", "ignore", result.getLog10PosteriorOfAFGT0()); for (final Allele allele : result.getAllelesUsedInGenotyping()) { if (allele.isNonReference()) { printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele)); printCallElement( vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele)); } } callReport.flush(); }
private static void mergeGenotypes( GenotypesContext mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) { for (Genotype g : oneVC.getGenotypes()) { String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples); if (!mergedGenotypes.containsSample(name)) { // only add if the name is new Genotype newG = g; if (uniqifySamples || alleleMapping.needsRemapping()) { final List<Allele> alleles = alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles(); newG = new Genotype( name, alleles, g.getLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased()); } mergedGenotypes.add(newG); } } }
public Map<String, Object> annotate( RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here return null; if (!vc.isSNP() && !vc.isIndel() && !vc.isMixed()) return null; final AlignmentContext context = AlignmentContextUtils.joinContexts(stratifiedContexts.values()); final int contextWingSize = Math.min((ref.getWindow().size() - 1) / 2, MIN_CONTEXT_WING_SIZE); final int contextSize = contextWingSize * 2 + 1; final int locus = ref.getLocus().getStart() + (ref.getLocus().getStop() - ref.getLocus().getStart()) / 2; final ReadBackedPileup pileup = context.getBasePileup(); // Compute all haplotypes consistent with the current read pileup final List<Haplotype> haplotypes = computeHaplotypes(pileup, contextSize, locus, vc); final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage(); if (haplotypes != null) { for (final Genotype genotype : vc.getGenotypes()) { final AlignmentContext thisContext = stratifiedContexts.get(genotype.getSampleName()); if (thisContext != null) { final ReadBackedPileup thisPileup = thisContext.getBasePileup(); if (vc.isSNP()) scoreRA.add( scoreReadsAgainstHaplotypes( haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can // be negative and the RMS doesn't make sense else if (vc.isIndel() || vc.isMixed()) { Double d = scoreIndelsAgainstHaplotypes(thisPileup); if (d == null) return null; scoreRA.add( d); // Taking the simple average of all sample's score since the score can be // negative and the RMS doesn't make sense } } } } // annotate the score in the info field final Map<String, Object> map = new HashMap<String, Object>(); map.put(getKeyNames().get(0), String.format("%.4f", scoreRA.mean())); return map; }
static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) { // Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1: for (final Genotype gt1 : vc1.getGenotypes()) { Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom return false; } return true; }
public static VariantContext purgeUnallowedGenotypeAttributes( VariantContext vc, Set<String> allowedAttributes) { if (allowedAttributes == null) return vc; GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples()); for (final Genotype genotype : vc.getGenotypes()) { Map<String, Object> attrs = new HashMap<String, Object>(); for (Map.Entry<String, Object> attr : genotype.getAttributes().entrySet()) { if (allowedAttributes.contains(attr.getKey())) attrs.put(attr.getKey(), attr.getValue()); } newGenotypes.add(Genotype.modifyAttributes(genotype, attrs)); } return new VariantContextBuilder(vc).genotypes(newGenotypes).make(); }
/** * Copy constructor * * @param other the VariantContext to copy */ protected VariantContext(VariantContext other) { this( other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(), other.getAlleles(), other.getGenotypes(), other.getLog10PError(), other.getFiltersMaybeNull(), other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, NO_VALIDATION); }
static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) { for (final Genotype gt1 : vc1.getGenotypes()) { Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); List<Allele> site1Alleles = gt1.getAlleles(); List<Allele> site2Alleles = gt2.getAlleles(); Iterator<Allele> all2It = site2Alleles.iterator(); for (Allele all1 : site1Alleles) { Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable() if (all1.isNonReference() && all2.isNonReference()) // corresponding alleles are alternate return true; } } return false; }
public void update2( VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (eval == null || (getWalker().ignoreAC0Sites() && eval.isMonomorphicInSamples())) return; final Type type = getType(eval); if (type == null) return; TypeSampleMap titvTable = null; // update DP, if possible if (eval.hasAttribute(VCFConstants.DEPTH_KEY)) depthPerSample.inc(type, ALL); // update counts allVariantCounts.inc(type, ALL); // type specific calculations if (type == Type.SNP && eval.isBiallelic()) { titvTable = VariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample; titvTable.inc(type, ALL); } // novelty calculation if (comp != null || (type == Type.CNV && overlapsKnownCNV(eval))) knownVariantCounts.inc(type, ALL); // per sample metrics for (final Genotype g : eval.getGenotypes()) { if (!g.isNoCall() && !g.isHomRef()) { countsPerSample.inc(type, g.getSampleName()); // update transition / transversion ratio if (titvTable != null) titvTable.inc(type, g.getSampleName()); if (g.hasDP()) depthPerSample.inc(type, g.getSampleName()); } } }
@Test(dataProvider = "mergeGenotypes") public void testMergeGenotypes(MergeGenotypesTest cfg) { final VariantContext merged = VariantContextUtils.simpleMerge( genomeLocParser, cfg.inputs, cfg.priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); // test alleles are equal Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles()); // test genotypes assertGenotypesAreMostlyEqual(merged.getGenotypes(), cfg.expected.getGenotypes()); }
static boolean doubleAllelesSegregatePerfectlyAmongSamples( VariantContext vc1, VariantContext vc2) { // Check that Alleles at vc1 and at vc2 always segregate together in all samples (including // reference): Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>(); Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>(); // Note the segregation of the alleles for the reference genome: allele1ToAllele2.put(vc1.getReference(), vc2.getReference()); allele2ToAllele1.put(vc2.getReference(), vc1.getReference()); // Note the segregation of the alleles for each sample (and check that it is consistent with the // reference and all previous samples). for (final Genotype gt1 : vc1.getGenotypes()) { Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); List<Allele> site1Alleles = gt1.getAlleles(); List<Allele> site2Alleles = gt2.getAlleles(); Iterator<Allele> all2It = site2Alleles.iterator(); for (Allele all1 : site1Alleles) { Allele all2 = all2It.next(); Allele all1To2 = allele1ToAllele2.get(all1); if (all1To2 == null) allele1ToAllele2.put(all1, all2); else if (!all1To2.equals(all2)) // all1 segregates with two different alleles at site 2 return false; Allele all2To1 = allele2ToAllele1.get(all2); if (all2To1 == null) allele2ToAllele1.put(all2, all1); else if (!all2To1.equals(all1)) // all2 segregates with two different alleles at site 1 return false; } } return true; }
public static VariantContextBuilder pruneVariantContext( final VariantContextBuilder builder, Collection<String> keysToPreserve) { final VariantContext vc = builder.make(); if (keysToPreserve == null) keysToPreserve = Collections.emptyList(); // VC info final Map<String, Object> attributes = subsetAttributes(vc.commonInfo, keysToPreserve); // Genotypes final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples()); for (final Genotype g : vc.getGenotypes()) { Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve); genotypes.add( new Genotype( g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.getFilters(), genotypeAttributes, g.isPhased())); } return builder.genotypes(genotypes).attributes(attributes); }
static VariantContext reallyMergeIntoMNP( VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) { int startInter = vc1.getEnd() + 1; int endInter = vc2.getStart() - 1; byte[] intermediateBases = null; if (startInter <= endInter) { intermediateBases = referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases(); StringUtil.toUpperCase(intermediateBases); } MergedAllelesData mergeData = new MergedAllelesData( intermediateBases, vc1, vc2); // ensures that the reference allele is added GenotypesContext mergedGenotypes = GenotypesContext.create(); for (final Genotype gt1 : vc1.getGenotypes()) { Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); List<Allele> site1Alleles = gt1.getAlleles(); List<Allele> site2Alleles = gt2.getAlleles(); List<Allele> mergedAllelesForSample = new LinkedList<Allele>(); /* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records, we preserve phase information (if any) relative to whatever precedes vc1: */ Iterator<Allele> all2It = site2Alleles.iterator(); for (Allele all1 : site1Alleles) { Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable() Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2); mergedAllelesForSample.add(mergedAllele); } double mergedGQ = Math.max(gt1.getLog10PError(), gt2.getLog10PError()); Set<String> mergedGtFilters = new HashSet< String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered Map<String, Object> mergedGtAttribs = new HashMap<String, Object>(); PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2); if (phaseQual.PQ != null) mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ); Genotype mergedGt = new Genotype( gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased); mergedGenotypes.add(mergedGt); } String mergedName = mergeVariantContextNames(vc1.getSource(), vc2.getSource()); double mergedLog10PError = Math.min(vc1.getLog10PError(), vc2.getLog10PError()); Set<String> mergedFilters = new HashSet< String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered Map<String, Object> mergedAttribs = mergeVariantContextAttributes(vc1, vc2); // ids List<String> mergedIDs = new ArrayList<String>(); if (vc1.hasID()) mergedIDs.add(vc1.getID()); if (vc2.hasID()) mergedIDs.add(vc2.getID()); String mergedID = mergedIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs); VariantContextBuilder mergedBuilder = new VariantContextBuilder( mergedName, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles()) .id(mergedID) .genotypes(mergedGenotypes) .log10PError(mergedLog10PError) .filters(mergedFilters) .attributes(mergedAttribs); VariantContextUtils.calculateChromosomeCounts(mergedBuilder, true); return mergedBuilder.make(); }
public void writeBeagleOutput( VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) { GenomeLoc currentLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), preferredVC); StringBuffer beagleOut = new StringBuffer(); String marker = String.format("%s:%d ", currentLoc.getContig(), currentLoc.getStart()); beagleOut.append(marker); if (markers != null) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t"); for (Allele allele : preferredVC.getAlleles()) { String bglPrintString; if (allele.isNoCall() || allele.isNull()) bglPrintString = "-"; else bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele beagleOut.append(String.format("%s ", bglPrintString)); if (markers != null) markers.append(bglPrintString).append("\t"); } if (markers != null) markers.append("\n"); GenotypesContext preferredGenotypes = preferredVC.getGenotypes(); GenotypesContext otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null; for (String sample : samples) { boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE; Genotype genotype; boolean isValidation; // use sample as key into genotypes structure if (preferredGenotypes.containsSample(sample)) { genotype = preferredGenotypes.get(sample); isValidation = isValidationSite; } else if (otherGenotypes != null && otherGenotypes.containsSample(sample)) { genotype = otherGenotypes.get(sample); isValidation = !isValidationSite; } else { // there is magically no genotype for this sample. throw new StingException( "Sample " + sample + " arose with no genotype in variant or validation VCF. This should never happen."); } /* * Use likelihoods if: is validation, prior is negative; or: is not validation, has genotype key */ double[] log10Likelihoods = null; if ((isValidation && prior < 0.0) || genotype.hasLikelihoods()) { log10Likelihoods = genotype.getLikelihoods().getAsVector(); // see if we need to randomly mask out genotype in this position. if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() <= insertedNoCallRate) { // we are masking out this genotype log10Likelihoods = isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS; } if (isMaleOnChrX) { log10Likelihoods[1] = -255; // todo -- warning this is dangerous for multi-allele case } } /** otherwise, use the prior uniformly */ else if (!isValidation && genotype.isCalled() && !genotype.hasLikelihoods()) { // hack to deal with input VCFs with no genotype likelihoods. Just assume the called // genotype // is confident. This is useful for Hapmap and 1KG release VCFs. double AA = (1.0 - prior) / 2.0; double AB = (1.0 - prior) / 2.0; double BB = (1.0 - prior) / 2.0; if (genotype.isHomRef()) { AA = prior; } else if (genotype.isHet()) { AB = prior; } else if (genotype.isHomVar()) { BB = prior; } log10Likelihoods = MathUtils.toLog10(new double[] {AA, isMaleOnChrX ? 0.0 : AB, BB}); } else { log10Likelihoods = isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS; } writeSampleLikelihoods(beagleOut, preferredVC, log10Likelihoods); } beagleWriter.println(beagleOut.toString()); }
public static VariantContext createVariantContextWithPaddedAlleles( VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) { // see if we need to pad common reference base from all alleles boolean padVC; // We need to pad a VC with a common base if the length of the reference allele is less than the // length of the VariantContext. // This happens because the position of e.g. an indel is always one before the actual event (as // per VCF convention). long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1; if (inputVC.hasSymbolicAlleles()) padVC = true; else if (inputVC.getReference().length() == locLength) padVC = false; else if (inputVC.getReference().length() == locLength - 1) padVC = true; else throw new IllegalArgumentException( "Badly formed variant context at location " + String.valueOf(inputVC.getStart()) + " in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size"); // nothing to do if we don't need to pad bases if (padVC) { if (!inputVC.hasReferenceBaseForIndel()) throw new ReviewedStingException( "Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available."); Byte refByte = inputVC.getReferenceBaseForIndel(); List<Allele> alleles = new ArrayList<Allele>(); for (Allele a : inputVC.getAlleles()) { // get bases for current allele and create a new one with trimmed bases if (a.isSymbolic()) { alleles.add(a); } else { String newBases; if (refBaseShouldBeAppliedToEndOfAlleles) newBases = a.getBaseString() + new String(new byte[] {refByte}); else newBases = new String(new byte[] {refByte}) + a.getBaseString(); alleles.add(Allele.create(newBases, a.isReference())); } } // now we can recreate new genotypes with trimmed alleles GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples()); for (final Genotype g : inputVC.getGenotypes()) { List<Allele> inAlleles = g.getAlleles(); List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size()); for (Allele a : inAlleles) { if (a.isCalled()) { if (a.isSymbolic()) { newGenotypeAlleles.add(a); } else { String newBases; if (refBaseShouldBeAppliedToEndOfAlleles) newBases = a.getBaseString() + new String(new byte[] {refByte}); else newBases = new String(new byte[] {refByte}) + a.getBaseString(); newGenotypeAlleles.add(Allele.create(newBases, a.isReference())); } } else { // add no-call allele newGenotypeAlleles.add(Allele.NO_CALL); } } genotypes.add( new Genotype( g.getSampleName(), newGenotypeAlleles, g.getLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased())); } return new VariantContextBuilder(inputVC).alleles(alleles).genotypes(genotypes).make(); } else return inputVC; }
public String update1( VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { nCalledLoci++; // Note from Eric: // This is really not correct. What we really want here is a polymorphic vs. monomorphic count // (i.e. on the Genotypes). // So in order to maintain consistency with the previous implementation (and the intention of // the original author), I've // added in a proxy check for monomorphic status here. // Protect against case when vc only as no-calls too - can happen if we strafity by sample and // sample as a single no-call. if (vc1.isMonomorphicInSamples()) { nRefLoci++; } else { switch (vc1.getType()) { case NO_VARIATION: // shouldn't get here break; case SNP: nVariantLoci++; nSNPs++; if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++; break; case MNP: nVariantLoci++; nMNPs++; if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++; break; case INDEL: nVariantLoci++; if (vc1.isSimpleInsertion()) nInsertions++; else if (vc1.isSimpleDeletion()) nDeletions++; else nComplex++; break; case MIXED: nVariantLoci++; nMixed++; break; case SYMBOLIC: nSymbolic++; break; default: throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType()); } } String refStr = vc1.getReference().getBaseString().toUpperCase(); String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase() : null; // if (aaStr.equals(".")) { // aaStr = refStr; // } // ref aa alt class // A C A der homozygote // A C C anc homozygote // A A A ref homozygote // A A C // A C A // A C C for (final Genotype g : vc1.getGenotypes()) { final String altStr = vc1.getAlternateAlleles().size() > 0 ? vc1.getAlternateAllele(0).getBaseString().toUpperCase() : null; switch (g.getType()) { case NO_CALL: nNoCalls++; break; case HOM_REF: nHomRef++; if (aaStr != null && altStr != null && !refStr.equalsIgnoreCase(aaStr)) { nHomDerived++; } break; case HET: nHets++; break; case HOM_VAR: nHomVar++; if (aaStr != null && altStr != null && !altStr.equalsIgnoreCase(aaStr)) { nHomDerived++; } break; case MIXED: break; default: throw new ReviewedStingException("BUG: Unexpected genotype type: " + g); } } return null; // we don't capture any interesting sites }
public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) { // see if we need to trim common reference base from all alleles boolean trimVC; // We need to trim common reference base from all alleles in all genotypes if a ref base is // common to all alleles Allele refAllele = inputVC.getReference(); if (!inputVC.isVariant()) trimVC = false; else if (refAllele.isNull()) trimVC = false; else { trimVC = (AbstractVCFCodec.computeForwardClipping( new ArrayList<Allele>(inputVC.getAlternateAlleles()), inputVC.getReference().getDisplayString()) > 0); } // nothing to do if we don't need to trim bases if (trimVC) { List<Allele> alleles = new ArrayList<Allele>(); GenotypesContext genotypes = GenotypesContext.create(); // set the reference base for indels in the attributes Map<String, Object> attributes = new TreeMap<String, Object>(inputVC.getAttributes()); Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>(); for (Allele a : inputVC.getAlleles()) { if (a.isSymbolic()) { alleles.add(a); originalToTrimmedAlleleMap.put(a, a); } else { // get bases for current allele and create a new one with trimmed bases byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length()); Allele trimmedAllele = Allele.create(newBases, a.isReference()); alleles.add(trimmedAllele); originalToTrimmedAlleleMap.put(a, trimmedAllele); } } // detect case where we're trimming bases but resulting vc doesn't have any null allele. In // that case, we keep original representation // example: mixed records such as {TA*,TGA,TG} boolean hasNullAlleles = false; for (Allele a : originalToTrimmedAlleleMap.values()) { if (a.isNull()) hasNullAlleles = true; if (a.isReference()) refAllele = a; } if (!hasNullAlleles) return inputVC; // now we can recreate new genotypes with trimmed alleles for (final Genotype genotype : inputVC.getGenotypes()) { List<Allele> originalAlleles = genotype.getAlleles(); List<Allele> trimmedAlleles = new ArrayList<Allele>(); for (Allele a : originalAlleles) { if (a.isCalled()) trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); else trimmedAlleles.add(Allele.NO_CALL); } genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles)); } final VariantContextBuilder builder = new VariantContextBuilder(inputVC); return builder .alleles(alleles) .genotypes(genotypes) .attributes(attributes) .referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])) .make(); } return inputVC; }