public static VariantContext getVCFromAllelesRod( RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) { if (tracker == null || ref == null || logger == null) return null; VariantContext vc = null; // search for usable record for (final VariantContext vc_input : tracker.getValues(allelesBinding, loc)) { if (vc_input != null && !vc_input.isFiltered() && (!requireSNP || vc_input.isSNP())) { if (vc == null) { vc = vc_input; } else { logger.warn( "Multiple valid VCF records detected in the alleles input file at site " + ref.getLocus() + ", only considering the first record"); } } } return vc; }
public static BaseUtils.BaseSubstitutionType getSNPSubstitutionType(VariantContext context) { if (!context.isSNP() || !context.isBiallelic()) throw new IllegalStateException( "Requested SNP substitution type for bialleic non-SNP " + context); return BaseUtils.SNPSubstitutionType( context.getReference().getBases()[0], context.getAlternateAllele(0).getBases()[0]); }
public Map<String, Object> annotate( final RefMetaDataTracker tracker, final AnnotatorCompatible walker, final ReferenceContext ref, final Map<String, AlignmentContext> stratifiedContexts, final VariantContext vc, final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) { int run; if (vc.isMixed()) { Map<String, Object> map = new HashMap<String, Object>(); map.put(getKeyNames().get(0), String.format("%s", "MIXED")); return map; } else if (vc.isIndel()) { String type = ""; if (!vc.isBiallelic()) type = "MULTIALLELIC_INDEL"; else { if (vc.isSimpleInsertion()) type = "INS."; else if (vc.isSimpleDeletion()) type = "DEL."; else type = "OTHER."; ArrayList<Integer> inds = IndelUtils.findEventClassificationIndex(vc, ref); for (int k : inds) { type = type + IndelUtils.getIndelClassificationName(k) + "."; } } Map<String, Object> map = new HashMap<String, Object>(); map.put(getKeyNames().get(0), String.format("%s", type)); return map; } else { return null; } }
@Test(dataProvider = "mergeAlleles") public void testMergeAlleles(MergeAllelesTest cfg) { final List<VariantContext> inputs = new ArrayList<VariantContext>(); int i = 0; for (final List<Allele> alleles : cfg.inputs) { final String name = "vcf" + ++i; inputs.add(makeVC(name, alleles)); } final List<String> priority = vcs2priority(inputs); final VariantContext merged = VariantContextUtils.simpleMerge( genomeLocParser, inputs, priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false); Assert.assertEquals(merged.getAlleles(), cfg.expected); }
/** * Checks if vc has a variant call for (at least one of) the samples. * * @param vc the variant rod VariantContext. Here, the variant is the dataset you're looking for * discordances to (e.g. HapMap) * @param compVCs the comparison VariantContext (discordance * @return */ private boolean isDiscordant(VariantContext vc, Collection<VariantContext> compVCs) { if (vc == null) return false; // if we're not looking at specific samples then the absence of a compVC means discordance if (NO_SAMPLES_SPECIFIED) return (compVCs == null || compVCs.isEmpty()); // check if we find it in the variant rod Map<String, Genotype> genotypes = vc.getGenotypes(samples); for (Genotype g : genotypes.values()) { if (sampleHasVariant(g)) { // There is a variant called (or filtered with not exclude filtered option set) that is not // HomRef for at least one of the samples. if (compVCs == null) return true; // Look for this sample in the all vcs of the comp ROD track. boolean foundVariant = false; for (VariantContext compVC : compVCs) { if (sampleHasVariant(compVC.getGenotype(g.getSampleName()))) { foundVariant = true; break; } } // if (at least one sample) was not found in all VCs of the comp ROD, we have discordance if (!foundVariant) return true; } } return false; // we only get here if all samples have a variant in the comp rod. }
private boolean isConcordant(VariantContext vc, Collection<VariantContext> compVCs) { if (vc == null || compVCs == null || compVCs.isEmpty()) return false; // if we're not looking for specific samples then the fact that we have both VCs is enough to // call it concordant. if (NO_SAMPLES_SPECIFIED) return true; // make a list of all samples contained in this variant VC that are being tracked by the user // command line arguments. Set<String> variantSamples = vc.getSampleNames(); variantSamples.retainAll(samples); // check if we can find all samples from the variant rod in the comp rod. for (String sample : variantSamples) { boolean foundSample = false; for (VariantContext compVC : compVCs) { Genotype varG = vc.getGenotype(sample); Genotype compG = compVC.getGenotype(sample); if (haveSameGenotypes(varG, compG)) { foundSample = true; break; } } // if at least one sample doesn't have the same genotype, we don't have concordance if (!foundSample) { return false; } } return true; }
protected final void printCallInfo( final VariantContext vc, final double[] log10AlleleFrequencyPriors, final long runtimeNano, final AFCalcResult result) { printCallElement(vc, "type", "ignore", vc.getType()); int allelei = 0; for (final Allele a : vc.getAlleles()) printCallElement(vc, "allele", allelei++, a.getDisplayString()); for (final Genotype g : vc.getGenotypes()) printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString()); for (int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++) printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]); printCallElement(vc, "runtime.nano", "ignore", runtimeNano); printCallElement(vc, "log10PosteriorOfAFEq0", "ignore", result.getLog10PosteriorOfAFEq0()); printCallElement(vc, "log10PosteriorOfAFGt0", "ignore", result.getLog10PosteriorOfAFGT0()); for (final Allele allele : result.getAllelesUsedInGenotyping()) { if (allele.isNonReference()) { printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele)); printCallElement( vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele)); } } callReport.flush(); }
@Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { // First, verify that the metadata tracker is not null (meaning there is a variant at this locus // to process). if (tracker != null) { // Get all of the "VariantContext" objects that span this locus. A VariantContext represents // a line in a VCF file. Collection<VariantContext> vcs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false); // There may be more than one variant at this locus. Process them all. for (VariantContext vc : vcs) { out.println( "Hello, ref=" + vc.getReference() + ",alt=" + vc.getAltAlleleWithHighestAlleleCount() + " at " + vc.getChr() + ":" + vc.getStart()); } // Return 1, indicating that we saw a variant. return 1; } // We saw nothing of interest, so return 0. return 0; }
@Test public void testAnnotationSet() { for (final boolean annotate : Arrays.asList(true, false)) { for (final String set : Arrays.asList("set", "combine", "x")) { final List<String> priority = Arrays.asList("1", "2"); VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); final VariantContext merged = VariantContextUtils.simpleMerge( genomeLocParser, Arrays.asList(vc1, vc2), priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false); if (annotate) Assert.assertEquals(merged.getAttribute(set), VariantContextUtils.MERGE_INTERSECTION); else Assert.assertFalse(merged.hasAttribute(set)); } } }
@Test(dataProvider = "mergeFiltered") public void testMergeFiltered(MergeFilteredTest cfg) { final List<String> priority = vcs2priority(cfg.inputs); final VariantContext merged = VariantContextUtils.simpleMerge( genomeLocParser, cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); // test alleles are equal Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles()); // test set field Assert.assertEquals(merged.getAttribute("set"), cfg.setExpected); // test filter field Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters()); }
/** * Returns a context identical to this with the REF and ALT alleles reverse complemented. * * @param vc variant context * @return new vc */ public static VariantContext reverseComplement(VariantContext vc) { // create a mapping from original allele to reverse complemented allele HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size()); for (Allele originalAllele : vc.getAlleles()) { Allele newAllele; if (originalAllele.isNoCall() || originalAllele.isNull()) newAllele = originalAllele; else newAllele = Allele.create( BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference()); alleleMap.put(originalAllele, newAllele); } // create new Genotype objects GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples()); for (final Genotype genotype : vc.getGenotypes()) { List<Allele> newAlleles = new ArrayList<Allele>(); for (Allele allele : genotype.getAlleles()) { Allele newAllele = alleleMap.get(allele); if (newAllele == null) newAllele = Allele.NO_CALL; newAlleles.add(newAllele); } newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles)); } return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make(); }
private static void mergeGenotypes( GenotypesContext mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) { for (Genotype g : oneVC.getGenotypes()) { String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples); if (!mergedGenotypes.containsSample(name)) { // only add if the name is new Genotype newG = g; if (uniqifySamples || alleleMapping.needsRemapping()) { final List<Allele> alleles = alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles(); newG = new Genotype( name, alleles, g.getLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased()); } mergedGenotypes.add(newG); } } }
/** * Update the attributes of the attributes map in the VariantContextBuilder to reflect the proper * chromosome-based VCF tags based on the current VC produced by builder.make() * * @param builder the VariantContextBuilder we are updating * @param removeStaleValues should we remove stale values from the mapping? */ public static void calculateChromosomeCounts( VariantContextBuilder builder, boolean removeStaleValues) { final VariantContext vc = builder.make(); final Map<String, Object> attrs = calculateChromosomeCounts( vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues); builder.attributes(attrs); }
private static final List<String> vcs2priority(final Collection<VariantContext> vcs) { final List<String> priority = new ArrayList<String>(); for (final VariantContext vc : vcs) { priority.add(vc.getSource()); } return priority; }
public MergedAllelesData(byte[] intermediateBases, VariantContext vc1, VariantContext vc2) { this.mergedAlleles = new HashMap< AlleleOneAndTwo, Allele>(); // implemented equals() and hashCode() for AlleleOneAndTwo this.intermediateBases = intermediateBases; this.intermediateLength = this.intermediateBases != null ? this.intermediateBases.length : 0; this.ensureMergedAllele(vc1.getReference(), vc2.getReference(), true); }
public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) { // if all alleles of vc1 are a contained in alleles of vc2, return true if (!vc1.getReference().equals(vc2.getReference())) return false; for (Allele a : vc1.getAlternateAlleles()) { if (!vc2.getAlternateAlleles().contains(a)) return false; } return true; }
@Override public String toString() { return String.format( "ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s", vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(), originalCall.getLog10PosteriorOfAFGT0(), new AutoFormattingTime(runtime / 1e9).toString()); }
private int getIndex(VariantContext vc) { int i = priorityListOfVCs.indexOf(vc.getSource()); if (i == -1) throw new UserException.BadArgumentValue( Utils.join(",", priorityListOfVCs), "Priority list " + priorityListOfVCs + " doesn't contain variant context " + vc.getSource()); return i; }
static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) { // Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1: for (final Genotype gt1 : vc1.getGenotypes()) { Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom return false; } return true; }
@Requires({ "vc != null", "variable != null", "key != null", "value != null", "callReport != null" }) private void printCallElement( final VariantContext vc, final Object variable, final Object key, final Object value) { final String loc = String.format("%s:%d", vc.getChr(), vc.getStart()); callReport.println(Utils.join("\t", Arrays.asList(loc, variable, key, value))); }
private static void verifyUniqueSampleNames(Collection<VariantContext> unsortedVCs) { Set<String> names = new HashSet<String>(); for (VariantContext vc : unsortedVCs) { for (String name : vc.getSampleNames()) { // System.out.printf("Checking %s %b%n", name, names.contains(name)); if (names.contains(name)) throw new UserException( "REQUIRE_UNIQUE sample names is true but duplicate names were discovered " + name); } names.addAll(vc.getSampleNames()); } }
/** * Update the attributes of the attributes map given the VariantContext to reflect the proper * chromosome-based VCF tags * * @param vc the VariantContext * @param attributes the attributes map to populate; must not be null; may contain old values * @param removeStaleValues should we remove stale values from the mapping? * @return the attributes map provided as input, returned for programming convenience */ public static Map<String, Object> calculateChromosomeCounts( VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) { final int AN = vc.getCalledChrCount(); // if everyone is a no-call, remove the old attributes if requested if (AN == 0 && removeStaleValues) { if (attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY)) attributes.remove(VCFConstants.ALLELE_COUNT_KEY); if (attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY)) attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY); if (attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY)) attributes.remove(VCFConstants.ALLELE_NUMBER_KEY); return attributes; } if (vc.hasGenotypes()) { attributes.put(VCFConstants.ALLELE_NUMBER_KEY, AN); // if there are alternate alleles, record the relevant tags if (vc.getAlternateAlleles().size() > 0) { final ArrayList<String> alleleFreqs = new ArrayList<String>(); final ArrayList<Integer> alleleCounts = new ArrayList<Integer>(); for (Allele allele : vc.getAlternateAlleles()) { int altChromosomes = vc.getCalledChrCount(allele); alleleCounts.add(altChromosomes); if (AN == 0) { alleleFreqs.add("0.0"); } else { // todo -- this is a performance problem final String freq = String.format( makePrecisionFormatStringFromDenominatorValue((double) AN), ((double) altChromosomes / (double) AN)); alleleFreqs.add(freq); } } attributes.put( VCFConstants.ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts); attributes.put( VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs); } else { attributes.put(VCFConstants.ALLELE_COUNT_KEY, 0); attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0); } } return attributes; }
private Type getType(VariantContext vc) { switch (vc.getType()) { case SNP: return Type.SNP; case INDEL: for (int l : vc.getIndelLengths()) if (Math.abs(l) > MAX_INDEL_LENGTH) return Type.CNV; return Type.INDEL; case SYMBOLIC: return Type.CNV; default: // throw new UserException.BadInput("Unexpected variant context type: " + vc); return null; } }
private Map<String, Object> calculateIC(final VariantContext vc) { final GenotypesContext genotypes = (founderIds == null || founderIds.isEmpty()) ? vc.getGenotypes() : vc.getGenotypes(founderIds); if (genotypes == null || genotypes.size() < MIN_SAMPLES) return null; int idxAA = 0, idxAB = 1, idxBB = 2; if (!vc.isBiallelic()) { // for non-bliallelic case, do test with most common alt allele. // Get then corresponding indeces in GL vectors to retrieve GL of AA,AB and BB. int[] idxVector = vc.getGLIndecesOfAlternateAllele(vc.getAltAlleleWithHighestAlleleCount()); idxAA = idxVector[0]; idxAB = idxVector[1]; idxBB = idxVector[2]; } double refCount = 0.0; double hetCount = 0.0; double homCount = 0.0; int N = 0; // number of samples that have likelihoods for (final Genotype g : genotypes) { if (g.isNoCall() || !g.hasLikelihoods()) continue; if (g.getPloidy() != 2) // only work for diploid samples continue; N++; final double[] normalizedLikelihoods = MathUtils.normalizeFromLog10(g.getLikelihoods().getAsVector()); refCount += normalizedLikelihoods[idxAA]; hetCount += normalizedLikelihoods[idxAB]; homCount += normalizedLikelihoods[idxBB]; } if (N < MIN_SAMPLES) { return null; } final double p = (2.0 * refCount + hetCount) / (2.0 * (refCount + hetCount + homCount)); // expected reference allele frequency final double q = 1.0 - p; // expected alternative allele frequency final double F = 1.0 - (hetCount / (2.0 * p * q * (double) N)); // inbreeding coefficient Map<String, Object> map = new HashMap<String, Object>(); map.put(getKeyNames().get(0), String.format("%.4f", F)); return map; }
public Map<String, Object> annotate( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { if (g == null || !g.isCalled()) return null; if (vc.isSNP()) return annotateSNP(stratifiedContext, vc); if (vc.isIndel()) return annotateIndel(stratifiedContext, vc); return null; }
public static VariantContext purgeUnallowedGenotypeAttributes( VariantContext vc, Set<String> allowedAttributes) { if (allowedAttributes == null) return vc; GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples()); for (final Genotype genotype : vc.getGenotypes()) { Map<String, Object> attrs = new HashMap<String, Object>(); for (Map.Entry<String, Object> attr : genotype.getAttributes().entrySet()) { if (allowedAttributes.contains(attr.getKey())) attrs.put(attr.getKey(), attr.getValue()); } newGenotypes.add(Genotype.modifyAttributes(genotype, attrs)); } return new VariantContextBuilder(vc).genotypes(newGenotypes).make(); }
private static Allele determineReferenceAllele(List<VariantContext> VCs) { Allele ref = null; for (VariantContext vc : VCs) { Allele myRef = vc.getReference(); if (ref == null || ref.length() < myRef.length()) ref = myRef; else if (ref.length() == myRef.length() && !ref.equals(myRef)) throw new UserException.BadInput( String.format( "The provided variant file(s) have inconsistent references for the same position(s) at %s:%d, %s vs. %s", vc.getChr(), vc.getStart(), ref, myRef)); } return ref; }
/** * Copy constructor * * @param other the VariantContext to copy */ protected VariantContext(VariantContext other) { this( other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(), other.getAlleles(), other.getGenotypes(), other.getLog10PError(), other.getFiltersMaybeNull(), other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, NO_VALIDATION); }
// decide whether we are currently processing SNPs, indels, neither, or both private List<GenotypeLikelihoodsCalculationModel.Model> getGLModelsToUse( final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext rawContext) { final List<GenotypeLikelihoodsCalculationModel.Model> models = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(2); String modelPrefix = ""; if (UAC.GLmodel.name().toUpperCase().contains("BOTH")) modelPrefix = UAC.GLmodel.name().toUpperCase().replaceAll("BOTH", ""); if (!UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) modelPrefix = GPSTRING + modelPrefix; // if we're genotyping given alleles and we have a requested SNP at this position, do SNP if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { final VariantContext vcInput = getVCFromAllelesRod( tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles); if (vcInput == null) return models; if (vcInput.isSNP()) { // ignore SNPs if the user chose INDEL mode only if (UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP")) models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix + "SNP")); } else if (vcInput.isIndel() || vcInput.isMixed()) { // ignore INDELs if the user chose SNP mode only if (UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL")) models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix + "INDEL")); } // No support for other types yet } else { if (UAC.GLmodel.name().toUpperCase().contains("BOTH")) { models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix + "SNP")); models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix + "INDEL")); } else { models.add( GenotypeLikelihoodsCalculationModel.Model.valueOf( modelPrefix + UAC.GLmodel.name().toUpperCase())); } } return models; }
private static List<String> calcVCFGenotypeKeys(VariantContext vc) { Set<String> keys = new HashSet<String>(); boolean sawGoodGT = false; boolean sawGoodQual = false; boolean sawGenotypeFilter = false; for (Genotype g : vc.getGenotypes().values()) { keys.addAll(g.getAttributes().keySet()); if (g.isAvailable()) sawGoodGT = true; if (g.hasNegLog10PError()) sawGoodQual = true; if (g.isFiltered() && g.isCalled()) sawGenotypeFilter = true; } if (sawGoodQual) keys.add(VCFConstants.GENOTYPE_QUALITY_KEY); if (sawGenotypeFilter) keys.add(VCFConstants.GENOTYPE_FILTER_KEY); List<String> sortedList = ParsingUtils.sortList(new ArrayList<String>(keys)); // make sure the GT is first if (sawGoodGT) { List<String> newList = new ArrayList<String>(sortedList.size() + 1); newList.add(VCFConstants.GENOTYPE_KEY); newList.addAll(sortedList); sortedList = newList; } return sortedList; }