/** * Returns a context identical to this with the REF and ALT alleles reverse complemented. * * @param vc variant context * @return new vc */ public static VariantContext reverseComplement(VariantContext vc) { // create a mapping from original allele to reverse complemented allele HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size()); for (Allele originalAllele : vc.getAlleles()) { Allele newAllele; if (originalAllele.isNoCall() || originalAllele.isNull()) newAllele = originalAllele; else newAllele = Allele.create( BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference()); alleleMap.put(originalAllele, newAllele); } // create new Genotype objects GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples()); for (final Genotype genotype : vc.getGenotypes()) { List<Allele> newAlleles = new ArrayList<Allele>(); for (Allele allele : genotype.getAlleles()) { Allele newAllele = alleleMap.get(allele); if (newAllele == null) newAllele = Allele.NO_CALL; newAlleles.add(newAllele); } newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles)); } return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make(); }
private static void mergeGenotypes( GenotypesContext mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) { for (Genotype g : oneVC.getGenotypes()) { String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples); if (!mergedGenotypes.containsSample(name)) { // only add if the name is new Genotype newG = g; if (uniqifySamples || alleleMapping.needsRemapping()) { final List<Allele> alleles = alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles(); newG = new Genotype( name, alleles, g.getLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased()); } mergedGenotypes.add(newG); } } }
public static GenotypesContext stripPLs(GenotypesContext genotypes) { GenotypesContext newGs = GenotypesContext.create(genotypes.size()); for (final Genotype g : genotypes) { newGs.add(g.hasLikelihoods() ? removePLs(g) : g); } return newGs; }
@Test public void testFixReverseComplementedGenotypes() { final Allele refA = Allele.create("A", true); final Allele altC = Allele.create("C", false); final GenotypesContext originalGenotypes = GenotypesContext.create(3); originalGenotypes.add(new GenotypeBuilder("homref").alleles(Arrays.asList(refA, refA)).make()); originalGenotypes.add(new GenotypeBuilder("het").alleles(Arrays.asList(refA, altC)).make()); originalGenotypes.add(new GenotypeBuilder("homvar").alleles(Arrays.asList(altC, altC)).make()); final Allele refT = Allele.create("T", true); final Allele altG = Allele.create("G", false); final GenotypesContext expectedGenotypes = GenotypesContext.create(3); expectedGenotypes.add(new GenotypeBuilder("homref").alleles(Arrays.asList(refT, refT)).make()); expectedGenotypes.add(new GenotypeBuilder("het").alleles(Arrays.asList(refT, altG)).make()); expectedGenotypes.add(new GenotypeBuilder("homvar").alleles(Arrays.asList(altG, altG)).make()); final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<Allele, Allele>(2); reverseComplementAlleleMap.put(refA, refT); reverseComplementAlleleMap.put(altC, altG); final GenotypesContext actualGenotypes = LiftoverVcf.fixGenotypes(originalGenotypes, reverseComplementAlleleMap); for (final String sample : Arrays.asList("homref", "het", "homvar")) { final List<Allele> expected = expectedGenotypes.get(sample).getAlleles(); final List<Allele> actual = actualGenotypes.get(sample).getAlleles(); Assert.assertEquals(expected.get(0), actual.get(0)); Assert.assertEquals(expected.get(1), actual.get(1)); } }
public static VariantContext purgeUnallowedGenotypeAttributes( VariantContext vc, Set<String> allowedAttributes) { if (allowedAttributes == null) return vc; GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples()); for (final Genotype genotype : vc.getGenotypes()) { Map<String, Object> attrs = new HashMap<String, Object>(); for (Map.Entry<String, Object> attr : genotype.getAttributes().entrySet()) { if (allowedAttributes.contains(attr.getKey())) attrs.put(attr.getKey(), attr.getValue()); } newGenotypes.add(Genotype.modifyAttributes(genotype, attrs)); } return new VariantContextBuilder(vc).genotypes(newGenotypes).make(); }
public static VariantContextBuilder pruneVariantContext( final VariantContextBuilder builder, Collection<String> keysToPreserve) { final VariantContext vc = builder.make(); if (keysToPreserve == null) keysToPreserve = Collections.emptyList(); // VC info final Map<String, Object> attributes = subsetAttributes(vc.commonInfo, keysToPreserve); // Genotypes final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples()); for (final Genotype g : vc.getGenotypes()) { Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve); genotypes.add( new Genotype( g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.getFilters(), genotypeAttributes, g.isPhased())); } return builder.genotypes(genotypes).attributes(attributes); }
public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) { // see if we need to trim common reference base from all alleles boolean trimVC; // We need to trim common reference base from all alleles in all genotypes if a ref base is // common to all alleles Allele refAllele = inputVC.getReference(); if (!inputVC.isVariant()) trimVC = false; else if (refAllele.isNull()) trimVC = false; else { trimVC = (AbstractVCFCodec.computeForwardClipping( new ArrayList<Allele>(inputVC.getAlternateAlleles()), inputVC.getReference().getDisplayString()) > 0); } // nothing to do if we don't need to trim bases if (trimVC) { List<Allele> alleles = new ArrayList<Allele>(); GenotypesContext genotypes = GenotypesContext.create(); // set the reference base for indels in the attributes Map<String, Object> attributes = new TreeMap<String, Object>(inputVC.getAttributes()); Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>(); for (Allele a : inputVC.getAlleles()) { if (a.isSymbolic()) { alleles.add(a); originalToTrimmedAlleleMap.put(a, a); } else { // get bases for current allele and create a new one with trimmed bases byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length()); Allele trimmedAllele = Allele.create(newBases, a.isReference()); alleles.add(trimmedAllele); originalToTrimmedAlleleMap.put(a, trimmedAllele); } } // detect case where we're trimming bases but resulting vc doesn't have any null allele. In // that case, we keep original representation // example: mixed records such as {TA*,TGA,TG} boolean hasNullAlleles = false; for (Allele a : originalToTrimmedAlleleMap.values()) { if (a.isNull()) hasNullAlleles = true; if (a.isReference()) refAllele = a; } if (!hasNullAlleles) return inputVC; // now we can recreate new genotypes with trimmed alleles for (final Genotype genotype : inputVC.getGenotypes()) { List<Allele> originalAlleles = genotype.getAlleles(); List<Allele> trimmedAlleles = new ArrayList<Allele>(); for (Allele a : originalAlleles) { if (a.isCalled()) trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); else trimmedAlleles.add(Allele.NO_CALL); } genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles)); } final VariantContextBuilder builder = new VariantContextBuilder(inputVC); return builder .alleles(alleles) .genotypes(genotypes) .attributes(attributes) .referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])) .make(); } return inputVC; }
public static VariantContext createVariantContextWithPaddedAlleles( VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) { // see if we need to pad common reference base from all alleles boolean padVC; // We need to pad a VC with a common base if the length of the reference allele is less than the // length of the VariantContext. // This happens because the position of e.g. an indel is always one before the actual event (as // per VCF convention). long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1; if (inputVC.hasSymbolicAlleles()) padVC = true; else if (inputVC.getReference().length() == locLength) padVC = false; else if (inputVC.getReference().length() == locLength - 1) padVC = true; else throw new IllegalArgumentException( "Badly formed variant context at location " + String.valueOf(inputVC.getStart()) + " in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size"); // nothing to do if we don't need to pad bases if (padVC) { if (!inputVC.hasReferenceBaseForIndel()) throw new ReviewedStingException( "Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available."); Byte refByte = inputVC.getReferenceBaseForIndel(); List<Allele> alleles = new ArrayList<Allele>(); for (Allele a : inputVC.getAlleles()) { // get bases for current allele and create a new one with trimmed bases if (a.isSymbolic()) { alleles.add(a); } else { String newBases; if (refBaseShouldBeAppliedToEndOfAlleles) newBases = a.getBaseString() + new String(new byte[] {refByte}); else newBases = new String(new byte[] {refByte}) + a.getBaseString(); alleles.add(Allele.create(newBases, a.isReference())); } } // now we can recreate new genotypes with trimmed alleles GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples()); for (final Genotype g : inputVC.getGenotypes()) { List<Allele> inAlleles = g.getAlleles(); List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size()); for (Allele a : inAlleles) { if (a.isCalled()) { if (a.isSymbolic()) { newGenotypeAlleles.add(a); } else { String newBases; if (refBaseShouldBeAppliedToEndOfAlleles) newBases = a.getBaseString() + new String(new byte[] {refByte}); else newBases = new String(new byte[] {refByte}) + a.getBaseString(); newGenotypeAlleles.add(Allele.create(newBases, a.isReference())); } } else { // add no-call allele newGenotypeAlleles.add(Allele.NO_CALL); } } genotypes.add( new Genotype( g.getSampleName(), newGenotypeAlleles, g.getLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased())); } return new VariantContextBuilder(inputVC).alleles(alleles).genotypes(genotypes).make(); } else return inputVC; }
static VariantContext reallyMergeIntoMNP( VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) { int startInter = vc1.getEnd() + 1; int endInter = vc2.getStart() - 1; byte[] intermediateBases = null; if (startInter <= endInter) { intermediateBases = referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases(); StringUtil.toUpperCase(intermediateBases); } MergedAllelesData mergeData = new MergedAllelesData( intermediateBases, vc1, vc2); // ensures that the reference allele is added GenotypesContext mergedGenotypes = GenotypesContext.create(); for (final Genotype gt1 : vc1.getGenotypes()) { Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); List<Allele> site1Alleles = gt1.getAlleles(); List<Allele> site2Alleles = gt2.getAlleles(); List<Allele> mergedAllelesForSample = new LinkedList<Allele>(); /* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records, we preserve phase information (if any) relative to whatever precedes vc1: */ Iterator<Allele> all2It = site2Alleles.iterator(); for (Allele all1 : site1Alleles) { Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable() Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2); mergedAllelesForSample.add(mergedAllele); } double mergedGQ = Math.max(gt1.getLog10PError(), gt2.getLog10PError()); Set<String> mergedGtFilters = new HashSet< String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered Map<String, Object> mergedGtAttribs = new HashMap<String, Object>(); PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2); if (phaseQual.PQ != null) mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ); Genotype mergedGt = new Genotype( gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased); mergedGenotypes.add(mergedGt); } String mergedName = mergeVariantContextNames(vc1.getSource(), vc2.getSource()); double mergedLog10PError = Math.min(vc1.getLog10PError(), vc2.getLog10PError()); Set<String> mergedFilters = new HashSet< String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered Map<String, Object> mergedAttribs = mergeVariantContextAttributes(vc1, vc2); // ids List<String> mergedIDs = new ArrayList<String>(); if (vc1.hasID()) mergedIDs.add(vc1.getID()); if (vc2.hasID()) mergedIDs.add(vc2.getID()); String mergedID = mergedIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs); VariantContextBuilder mergedBuilder = new VariantContextBuilder( mergedName, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles()) .id(mergedID) .genotypes(mergedGenotypes) .log10PError(mergedLog10PError) .filters(mergedFilters) .attributes(mergedAttribs); VariantContextUtils.calculateChromosomeCounts(mergedBuilder, true); return mergedBuilder.make(); }