private static void mergeGenotypes( GenotypesContext mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) { for (Genotype g : oneVC.getGenotypes()) { String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples); if (!mergedGenotypes.containsSample(name)) { // only add if the name is new Genotype newG = g; if (uniqifySamples || alleleMapping.needsRemapping()) { final List<Allele> alleles = alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles(); newG = new Genotype( name, alleles, g.getLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased()); } mergedGenotypes.add(newG); } } }
// necessary to not overload equals for genotypes private void assertGenotypesAreMostlyEqual(GenotypesContext actual, GenotypesContext expected) { if (actual == expected) { return; } if (actual == null || expected == null) { Assert.fail("Maps not equal: expected: " + expected + " and actual: " + actual); } if (actual.size() != expected.size()) { Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size()); } for (Genotype value : actual) { Genotype expectedValue = expected.get(value.getSampleName()); Assert.assertEquals( value.getAlleles(), expectedValue.getAlleles(), "Alleles in Genotype aren't equal"); Assert.assertEquals(value.getGQ(), expectedValue.getGQ(), "GQ values aren't equal"); Assert.assertEquals( value.hasLikelihoods(), expectedValue.hasLikelihoods(), "Either both have likelihoods or both not"); if (value.hasLikelihoods()) Assert.assertEquals( value.getLikelihoods().getAsVector(), expectedValue.getLikelihoods().getAsVector(), "Genotype likelihoods aren't equal"); } }
public void validateAlternateAlleles() { if (!hasGenotypes()) return; List<Allele> reportedAlleles = getAlleles(); Set<Allele> observedAlleles = new HashSet<Allele>(); observedAlleles.add(getReference()); for (final Genotype g : getGenotypes()) { if (g.isCalled()) observedAlleles.addAll(g.getAlleles()); } if (reportedAlleles.size() != observedAlleles.size()) throw new TribbleException.InternalCodecException( String.format( "the ALT allele(s) for the record at position %s:%d do not match what is observed in the per-sample genotypes", getChr(), getStart())); int originalSize = reportedAlleles.size(); // take the intersection and see if things change observedAlleles.retainAll(reportedAlleles); if (observedAlleles.size() != originalSize) throw new TribbleException.InternalCodecException( String.format( "the ALT allele(s) for the record at position %s:%d do not match what is observed in the per-sample genotypes", getChr(), getStart())); }
/** * Returns a context identical to this with the REF and ALT alleles reverse complemented. * * @param vc variant context * @return new vc */ public static VariantContext reverseComplement(VariantContext vc) { // create a mapping from original allele to reverse complemented allele HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size()); for (Allele originalAllele : vc.getAlleles()) { Allele newAllele; if (originalAllele.isNoCall() || originalAllele.isNull()) newAllele = originalAllele; else newAllele = Allele.create( BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference()); alleleMap.put(originalAllele, newAllele); } // create new Genotype objects GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples()); for (final Genotype genotype : vc.getGenotypes()) { List<Allele> newAlleles = new ArrayList<Allele>(); for (Allele allele : genotype.getAlleles()) { Allele newAllele = alleleMap.get(allele); if (newAllele == null) newAllele = Allele.NO_CALL; newAlleles.add(newAllele); } newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles)); } return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make(); }
/** * Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding * NO_CALLS) * * @return chromosome count */ public int getCalledChrCount() { int n = 0; for (final Genotype g : getGenotypes()) { for (final Allele a : g.getAlleles()) n += a.isNoCall() ? 0 : 1; } return n; }
/** * Returns the number of chromosomes carrying allele A in the genotypes * * @param a allele * @return chromosome count */ public int getCalledChrCount(Allele a) { int n = 0; for (final Genotype g : getGenotypes()) { n += g.getAlleles(a).size(); } return n; }
static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) { for (final Genotype gt1 : vc1.getGenotypes()) { Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); List<Allele> site1Alleles = gt1.getAlleles(); List<Allele> site2Alleles = gt2.getAlleles(); Iterator<Allele> all2It = site2Alleles.iterator(); for (Allele all1 : site1Alleles) { Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable() if (all1.isNonReference() && all2.isNonReference()) // corresponding alleles are alternate return true; } } return false; }
public static Genotype removePLs(Genotype g) { Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes()); attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); return new Genotype( g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased()); }
private void validateGenotypes() { if (this.genotypes == null) throw new IllegalStateException("Genotypes is null"); for (final Genotype g : this.genotypes) { if (g.isAvailable()) { for (Allele gAllele : g.getAlleles()) { if (!hasAllele(gAllele) && gAllele.isCalled()) throw new IllegalStateException( "Allele in genotype " + gAllele + " not in the variant context " + alleles); } } } }
/** * helper routine for subcontext * * @param genotypes genotypes * @return allele set */ private final Set<Allele> allelesOfGenotypes(Collection<Genotype> genotypes) { final Set<Allele> alleles = new HashSet<Allele>(); boolean addedref = false; for (final Genotype g : genotypes) { for (final Allele a : g.getAlleles()) { addedref = addedref || a.isReference(); if (a.isCalled()) alleles.add(a); } } if (!addedref) alleles.add(getReference()); return alleles; }
static boolean doubleAllelesSegregatePerfectlyAmongSamples( VariantContext vc1, VariantContext vc2) { // Check that Alleles at vc1 and at vc2 always segregate together in all samples (including // reference): Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>(); Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>(); // Note the segregation of the alleles for the reference genome: allele1ToAllele2.put(vc1.getReference(), vc2.getReference()); allele2ToAllele1.put(vc2.getReference(), vc1.getReference()); // Note the segregation of the alleles for each sample (and check that it is consistent with the // reference and all previous samples). for (final Genotype gt1 : vc1.getGenotypes()) { Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); List<Allele> site1Alleles = gt1.getAlleles(); List<Allele> site2Alleles = gt2.getAlleles(); Iterator<Allele> all2It = site2Alleles.iterator(); for (Allele all1 : site1Alleles) { Allele all2 = all2It.next(); Allele all1To2 = allele1ToAllele2.get(all1); if (all1To2 == null) allele1ToAllele2.put(all1, all2); else if (!all1To2.equals(all2)) // all1 segregates with two different alleles at site 2 return false; Allele all2To1 = allele2ToAllele1.get(all2); if (all2To1 == null) allele2ToAllele1.put(all2, all1); else if (!all2To1.equals(all1)) // all2 segregates with two different alleles at site 1 return false; } } return true; }
public static void assertEquals(final Genotype actual, final Genotype expected) { Assert.assertEquals(actual.getSampleName(), expected.getSampleName(), "Genotype names"); Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "Genotype alleles"); Assert.assertEquals( actual.getGenotypeString(), expected.getGenotypeString(), "Genotype string"); Assert.assertEquals(actual.getType(), expected.getType(), "Genotype type"); // filters are the same Assert.assertEquals(actual.getFilters(), expected.getFilters(), "Genotype fields"); Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "Genotype isFiltered"); // inline attributes Assert.assertEquals(actual.getDP(), expected.getDP(), "Genotype dp"); Assert.assertTrue(Arrays.equals(actual.getAD(), expected.getAD())); Assert.assertEquals(actual.getGQ(), expected.getGQ(), "Genotype gq"); Assert.assertEquals(actual.hasPL(), expected.hasPL(), "Genotype hasPL"); Assert.assertEquals(actual.hasAD(), expected.hasAD(), "Genotype hasAD"); Assert.assertEquals(actual.hasGQ(), expected.hasGQ(), "Genotype hasGQ"); Assert.assertEquals(actual.hasDP(), expected.hasDP(), "Genotype hasDP"); Assert.assertEquals( actual.hasLikelihoods(), expected.hasLikelihoods(), "Genotype haslikelihoods"); Assert.assertEquals( actual.getLikelihoodsString(), expected.getLikelihoodsString(), "Genotype getlikelihoodsString"); Assert.assertEquals( actual.getLikelihoods(), expected.getLikelihoods(), "Genotype getLikelihoods"); Assert.assertTrue(Arrays.equals(actual.getPL(), expected.getPL())); Assert.assertEquals( actual.getPhredScaledQual(), expected.getPhredScaledQual(), "Genotype phredScaledQual"); assertAttributesEquals(actual.getExtendedAttributes(), expected.getExtendedAttributes()); Assert.assertEquals(actual.isPhased(), expected.isPhased(), "Genotype isPhased"); Assert.assertEquals(actual.getPloidy(), expected.getPloidy(), "Genotype getPloidy"); }
public static VariantContextBuilder pruneVariantContext( final VariantContextBuilder builder, Collection<String> keysToPreserve) { final VariantContext vc = builder.make(); if (keysToPreserve == null) keysToPreserve = Collections.emptyList(); // VC info final Map<String, Object> attributes = subsetAttributes(vc.commonInfo, keysToPreserve); // Genotypes final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples()); for (final Genotype g : vc.getGenotypes()) { Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve); genotypes.add( new Genotype( g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.getFilters(), genotypeAttributes, g.isPhased())); } return builder.genotypes(genotypes).attributes(attributes); }
public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) { // see if we need to trim common reference base from all alleles boolean trimVC; // We need to trim common reference base from all alleles in all genotypes if a ref base is // common to all alleles Allele refAllele = inputVC.getReference(); if (!inputVC.isVariant()) trimVC = false; else if (refAllele.isNull()) trimVC = false; else { trimVC = (AbstractVCFCodec.computeForwardClipping( new ArrayList<Allele>(inputVC.getAlternateAlleles()), inputVC.getReference().getDisplayString()) > 0); } // nothing to do if we don't need to trim bases if (trimVC) { List<Allele> alleles = new ArrayList<Allele>(); GenotypesContext genotypes = GenotypesContext.create(); // set the reference base for indels in the attributes Map<String, Object> attributes = new TreeMap<String, Object>(inputVC.getAttributes()); Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>(); for (Allele a : inputVC.getAlleles()) { if (a.isSymbolic()) { alleles.add(a); originalToTrimmedAlleleMap.put(a, a); } else { // get bases for current allele and create a new one with trimmed bases byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length()); Allele trimmedAllele = Allele.create(newBases, a.isReference()); alleles.add(trimmedAllele); originalToTrimmedAlleleMap.put(a, trimmedAllele); } } // detect case where we're trimming bases but resulting vc doesn't have any null allele. In // that case, we keep original representation // example: mixed records such as {TA*,TGA,TG} boolean hasNullAlleles = false; for (Allele a : originalToTrimmedAlleleMap.values()) { if (a.isNull()) hasNullAlleles = true; if (a.isReference()) refAllele = a; } if (!hasNullAlleles) return inputVC; // now we can recreate new genotypes with trimmed alleles for (final Genotype genotype : inputVC.getGenotypes()) { List<Allele> originalAlleles = genotype.getAlleles(); List<Allele> trimmedAlleles = new ArrayList<Allele>(); for (Allele a : originalAlleles) { if (a.isCalled()) trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); else trimmedAlleles.add(Allele.NO_CALL); } genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles)); } final VariantContextBuilder builder = new VariantContextBuilder(inputVC); return builder .alleles(alleles) .genotypes(genotypes) .attributes(attributes) .referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])) .make(); } return inputVC; }
public void renderGenotypeBandSNP( Variant variant, RenderContext context, Rectangle bandRectangle, int pX0, int dX, String sampleName, VariantTrack.ColorMode coloring, boolean hideFiltered) { int pY = (int) bandRectangle.getY(); int dY = (int) bandRectangle.getHeight(); int tOffset = 6; int bOffset = 8; Graphics2D g = (Graphics2D) context.getGraphics().create(); if (dX >= 10) { if (dY > 24) { Font f = FontManager.getFont(Font.BOLD, Math.min(dX, 12)); g.setFont(f); } else if (dY > 18) { Font f = FontManager.getFont(Font.BOLD, Math.min(dX, 8)); tOffset = 4; bOffset = 5; g.setFont(f); } } boolean isFiltered = variant.isFiltered() && hideFiltered; Genotype genotype = variant.getGenotype(sampleName); if (genotype == null) { log.error("Now what?"); } else { Color b1Color = Color.gray; Color b2Color = Color.gray; char b1 = ' '; char b2 = ' '; // Assign proper coloring switch (coloring) { case GENOTYPE: b1Color = getGenotypeColor(genotype, isFiltered); b2Color = b1Color; break; case ALLELE: final List<Allele> alleleList = genotype.getAlleles(); if (alleleList.size() > 0) { b1 = getFirstBase(alleleList.get(0)); b1Color = nucleotideColors.get(b1); } if (alleleList.size() > 1) { b2 = getFirstBase(alleleList.get(1)); b2Color = nucleotideColors.get(b2); } break; case METHYLATION_RATE: final double goodBaseCount = genotype.getAttributeAsDouble("GB"); b1Color = colorNoCall; b2Color = b1Color; final double value = genotype.getAttributeAsDouble("MR"); if (!Double.isNaN(goodBaseCount) && !Double.isNaN(value)) { if (goodBaseCount < VariantTrack.METHYLATION_MIN_BASE_COUNT || Double.isNaN(value)) { b1Color = colorNoCall; b2Color = b1Color; } else { float mr = (float) value; mr /= 100f; b1Color = convertMethylationRateToColor(mr); b2Color = b1Color; } } else { log.error( "GB and MR fields must be defined for all records in a VCF methylation file."); } break; default: b1Color = colorNoCall; b2Color = b1Color; } int y0 = track.getDisplayMode() == Track.DisplayMode.EXPANDED ? pY + 1 : pY; int h = Math.max(1, track.getDisplayMode() == Track.DisplayMode.EXPANDED ? dY - 2 : dY); if (coloring == VariantTrack.ColorMode.GENOTYPE) { g.setColor(b1Color); g.fillRect(pX0, y0, dX, h); } else { // Color by allele g.setColor(b1Color); g.fillRect(pX0, y0, (dX / 2), h); g.setColor(b2Color); g.fillRect(pX0 + (dX / 2), y0, (dX / 2), h); } if ((dX >= 10) && (dY >= 18)) { if (b1Color == Color.blue) { g.setColor(Color.white); } else { g.setColor(Color.black); } drawCenteredText(g, new char[] {b1}, pX0, pY - tOffset, dX, dY); drawCenteredText(g, new char[] {b2}, pX0, pY + (dY / 2) - bOffset, dX, dY); } } g.dispose(); }
static VariantContext reallyMergeIntoMNP( VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) { int startInter = vc1.getEnd() + 1; int endInter = vc2.getStart() - 1; byte[] intermediateBases = null; if (startInter <= endInter) { intermediateBases = referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases(); StringUtil.toUpperCase(intermediateBases); } MergedAllelesData mergeData = new MergedAllelesData( intermediateBases, vc1, vc2); // ensures that the reference allele is added GenotypesContext mergedGenotypes = GenotypesContext.create(); for (final Genotype gt1 : vc1.getGenotypes()) { Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); List<Allele> site1Alleles = gt1.getAlleles(); List<Allele> site2Alleles = gt2.getAlleles(); List<Allele> mergedAllelesForSample = new LinkedList<Allele>(); /* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records, we preserve phase information (if any) relative to whatever precedes vc1: */ Iterator<Allele> all2It = site2Alleles.iterator(); for (Allele all1 : site1Alleles) { Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable() Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2); mergedAllelesForSample.add(mergedAllele); } double mergedGQ = Math.max(gt1.getLog10PError(), gt2.getLog10PError()); Set<String> mergedGtFilters = new HashSet< String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered Map<String, Object> mergedGtAttribs = new HashMap<String, Object>(); PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2); if (phaseQual.PQ != null) mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ); Genotype mergedGt = new Genotype( gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased); mergedGenotypes.add(mergedGt); } String mergedName = mergeVariantContextNames(vc1.getSource(), vc2.getSource()); double mergedLog10PError = Math.min(vc1.getLog10PError(), vc2.getLog10PError()); Set<String> mergedFilters = new HashSet< String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered Map<String, Object> mergedAttribs = mergeVariantContextAttributes(vc1, vc2); // ids List<String> mergedIDs = new ArrayList<String>(); if (vc1.hasID()) mergedIDs.add(vc1.getID()); if (vc2.hasID()) mergedIDs.add(vc2.getID()); String mergedID = mergedIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs); VariantContextBuilder mergedBuilder = new VariantContextBuilder( mergedName, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles()) .id(mergedID) .genotypes(mergedGenotypes) .log10PError(mergedLog10PError) .filters(mergedFilters) .attributes(mergedAttribs); VariantContextUtils.calculateChromosomeCounts(mergedBuilder, true); return mergedBuilder.make(); }
public static VariantContext createVariantContextWithPaddedAlleles( VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) { // see if we need to pad common reference base from all alleles boolean padVC; // We need to pad a VC with a common base if the length of the reference allele is less than the // length of the VariantContext. // This happens because the position of e.g. an indel is always one before the actual event (as // per VCF convention). long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1; if (inputVC.hasSymbolicAlleles()) padVC = true; else if (inputVC.getReference().length() == locLength) padVC = false; else if (inputVC.getReference().length() == locLength - 1) padVC = true; else throw new IllegalArgumentException( "Badly formed variant context at location " + String.valueOf(inputVC.getStart()) + " in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size"); // nothing to do if we don't need to pad bases if (padVC) { if (!inputVC.hasReferenceBaseForIndel()) throw new ReviewedStingException( "Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available."); Byte refByte = inputVC.getReferenceBaseForIndel(); List<Allele> alleles = new ArrayList<Allele>(); for (Allele a : inputVC.getAlleles()) { // get bases for current allele and create a new one with trimmed bases if (a.isSymbolic()) { alleles.add(a); } else { String newBases; if (refBaseShouldBeAppliedToEndOfAlleles) newBases = a.getBaseString() + new String(new byte[] {refByte}); else newBases = new String(new byte[] {refByte}) + a.getBaseString(); alleles.add(Allele.create(newBases, a.isReference())); } } // now we can recreate new genotypes with trimmed alleles GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples()); for (final Genotype g : inputVC.getGenotypes()) { List<Allele> inAlleles = g.getAlleles(); List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size()); for (Allele a : inAlleles) { if (a.isCalled()) { if (a.isSymbolic()) { newGenotypeAlleles.add(a); } else { String newBases; if (refBaseShouldBeAppliedToEndOfAlleles) newBases = a.getBaseString() + new String(new byte[] {refByte}); else newBases = new String(new byte[] {refByte}) + a.getBaseString(); newGenotypeAlleles.add(Allele.create(newBases, a.isReference())); } } else { // add no-call allele newGenotypeAlleles.add(Allele.NO_CALL); } } genotypes.add( new Genotype( g.getSampleName(), newGenotypeAlleles, g.getLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased())); } return new VariantContextBuilder(inputVC).alleles(alleles).genotypes(genotypes).make(); } else return inputVC; }