private GVstatus getGVstatus(final VariantContext vc) { return (!vc.hasAttribute("GV")) ? GVstatus.NONE : (vc.getAttribute("GV").equals("T") ? GVstatus.T : GVstatus.F); }
@Override public void accumulate(final VariantContext ctx) { logger.record(ctx.getContig(), ctx.getStart()); final String variantChrom = ctx.getContig(); final int variantPos = ctx.getStart(); // Skip anything a little too funky if (ctx.isFiltered()) return; if (!ctx.isVariant()) return; if (SKIP_CHROMS.contains(variantChrom)) return; for (final MendelianViolationMetrics trio : trios) { final Genotype momGt = ctx.getGenotype(trio.MOTHER); final Genotype dadGt = ctx.getGenotype(trio.FATHER); final Genotype kidGt = ctx.getGenotype(trio.OFFSPRING); // if any genotype: // - has a non-snp allele; or // - lacks a reference allele // // then ignore this trio if (CollectionUtil.makeList(momGt, dadGt, kidGt) .stream() .anyMatch( gt -> gt.isHetNonRef() || Stream.concat(Stream.of(ctx.getReference()), gt.getAlleles().stream()) .anyMatch(a -> a.length() != 1 || a.isSymbolic()))) { continue; } // if between the trio there are more than 2 alleles including the reference, continue if (Stream.concat( Collections.singleton(ctx.getReference()).stream(), CollectionUtil.makeList(momGt, dadGt, kidGt) .stream() .flatMap(gt -> gt.getAlleles().stream())) .collect(Collectors.toSet()) .size() > 2) continue; // Test to make sure: // 1) That the site is in fact variant in the trio // 2) that the offspring doesn't have a really wacky het allele balance if (!isVariant(momGt, dadGt, kidGt)) continue; if (kidGt.isHet()) { final int[] ad = kidGt.getAD(); if (ad == null) continue; final List<Integer> adOfAlleles = kidGt .getAlleles() .stream() .map(a -> ad[ctx.getAlleleIndex(a)]) .collect(Collectors.toList()); final double minAlleleFraction = Math.min(adOfAlleles.get(0), adOfAlleles.get(1)) / (double) (adOfAlleles.get(0) + adOfAlleles.get(1)); if (minAlleleFraction < MIN_HET_FRACTION) continue; } /////////////////////////////////////////////////////////////// // Determine whether the offspring should be haploid at this // locus and which is the parental donor of the haploid genotype /////////////////////////////////////////////////////////////// boolean haploid = false; Genotype haploidParentalGenotype = null; if (FEMALE_CHROMS.contains(variantChrom) && trio.OFFSPRING_SEX != Sex.Unknown) { if (trio.OFFSPRING_SEX == Sex.Female) { // famale haploid = false; } else if (isInPseudoAutosomalRegion(variantChrom, variantPos)) { // male but in PAR on X, so diploid haploid = false; } else { // male, out of PAR on X, haploid haploid = true; haploidParentalGenotype = momGt; } } // the PAR on the male chromosome should be masked so that reads // align to the female chromosomes instead, so there's no point // of worrying about that here. if (MALE_CHROMS.contains(variantChrom)) { if (trio.OFFSPRING_SEX == Sex.Male) { haploid = true; haploidParentalGenotype = dadGt; } else { continue; } } // We only want to look at sites where we have high enough confidence that the genotypes we // are looking at are // interesting. We want to ensure that parents are always GQ>=MIN_GQ, and that the kid is // either GQ>=MIN_GQ or in the // case where kid is het that the phred-scaled-likelihood of being reference is >=MIN_GQ. if (haploid && (haploidParentalGenotype.isNoCall() || haploidParentalGenotype.getGQ() < MIN_GQ)) continue; if (!haploid && (momGt.isNoCall() || momGt.getGQ() < MIN_GQ || dadGt.isNoCall() || dadGt.getGQ() < MIN_GQ)) continue; if (kidGt.isNoCall()) continue; if (momGt.isHomRef() && dadGt.isHomRef() && !kidGt.isHomRef()) { if (kidGt.getPL()[0] < MIN_GQ) continue; } else if (kidGt.getGQ() < MIN_GQ) continue; // Also filter on the DP for each of the samples - it's possible to miss hets when DP is too // low if (haploid && (kidGt.getDP() < MIN_DP || haploidParentalGenotype.getDP() < MIN_DP)) continue; if (!haploid && (kidGt.getDP() < MIN_DP || momGt.getDP() < MIN_DP || dadGt.getDP() < MIN_DP)) continue; trio.NUM_VARIANT_SITES++; /////////////////////////////////////////////////////////////// // First test for haploid violations /////////////////////////////////////////////////////////////// MendelianViolation type = null; if (haploid) { if (kidGt.isHet()) continue; // Should not see heterozygous calls at haploid regions if (!haploidParentalGenotype.getAlleles().contains(kidGt.getAllele(0))) { if (kidGt.isHomRef()) { type = MendelianViolation.Haploid_Other; trio.NUM_HAPLOID_OTHER++; } else { type = MendelianViolation.Haploid_Denovo; trio.NUM_HAPLOID_DENOVO++; } } } /////////////////////////////////////////////////////////////// // Then test for diploid mendelian violations /////////////////////////////////////////////////////////////// else if (isMendelianViolation(momGt, dadGt, kidGt)) { if (momGt.isHomRef() && dadGt.isHomRef() && !kidGt.isHomRef()) { trio.NUM_DIPLOID_DENOVO++; type = MendelianViolation.Diploid_Denovo; } else if (momGt.isHomVar() && dadGt.isHomVar() && kidGt.isHet()) { trio.NUM_HOMVAR_HOMVAR_HET++; type = MendelianViolation.HomVar_HomVar_Het; } else if (kidGt.isHom() && ((momGt.isHomRef() && dadGt.isHomVar()) || (momGt.isHomVar() && dadGt.isHomRef()))) { trio.NUM_HOMREF_HOMVAR_HOM++; type = MendelianViolation.HomRef_HomVar_Hom; } else if (kidGt.isHom() && ((momGt.isHom() && dadGt.isHet()) || (momGt.isHet() && dadGt.isHom()))) { trio.NUM_HOM_HET_HOM++; type = MendelianViolation.Hom_Het_Hom; } else { trio.NUM_OTHER++; type = MendelianViolation.Other; } } // Output a record into the family's violation VCF if (type != null) { // Create a new Context subsetted to the three samples final VariantContextBuilder builder = new VariantContextBuilder(ctx); builder.genotypes( ctx.getGenotypes() .subsetToSamples(CollectionUtil.makeSet(trio.MOTHER, trio.FATHER, trio.OFFSPRING))); builder.attribute(MENDELIAN_VIOLATION_KEY, type.name()); // Copy over some useful attributes from the full context if (ctx.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) builder.attribute(ORIGINAL_AC, ctx.getAttribute(VCFConstants.ALLELE_COUNT_KEY)); if (ctx.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) builder.attribute(ORIGINAL_AF, ctx.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)); if (ctx.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY)) builder.attribute(ORIGINAL_AN, ctx.getAttribute(VCFConstants.ALLELE_NUMBER_KEY)); // Write out the variant record familyToViolations.get(trio.FAMILY_ID).add(builder.make()); } } }
public CountedData map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { final CountedData counter = new CountedData(); // For some reason RodWalkers get map calls with null trackers if (tracker == null) return counter; VariantContext vcComp = tracker.getFirstValue(alleles); if (vcComp == null) return counter; // todo - not sure I want this, may be misleading to filter extended indel events. if (isInsideExtendedIndel(vcComp, ref)) return counter; // Do not operate on variants that are not covered to the optional minimum depth if (!context.hasReads() || (minDepth > 0 && context.getBasePileup().getBases().length < minDepth)) { counter.nUncovered = 1L; final GVstatus status = getGVstatus(vcComp); if (status == GVstatus.T) counter.nAltNotCalled = 1L; else if (status == GVstatus.F) counter.nRefNotCalled = 1L; else counter.nNoStatusNotCalled = 1L; return counter; } VariantCallContext call; if (vcComp.isSNP()) { call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0); } else if (vcComp.isIndel()) { call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0); } else if (bamIsTruth) { // assume it's a SNP if no variation is present; this is necessary so that we can test // supposed monomorphic sites against the truth bam call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0); } else { logger.info( "Not SNP or INDEL " + vcComp.getChr() + ":" + vcComp.getStart() + " " + vcComp.getAlleles()); return counter; } boolean writeVariant = true; if (bamIsTruth) { if (call.confidentlyCalled) { // If truth is a confident REF call if (call.isVariant()) { if (vcComp.isVariant()) counter.nAltCalledAlt = 1L; else { counter.nAltCalledRef = 1L; if (printInterestingSites) System.out.println("Truth=ALT Call=REF at " + call.getChr() + ":" + call.getStart()); } } // If truth is a confident ALT call else { if (vcComp.isVariant()) { counter.nRefCalledAlt = 1L; if (printInterestingSites) System.out.println("Truth=REF Call=ALT at " + call.getChr() + ":" + call.getStart()); } else counter.nRefCalledRef = 1L; } } else { counter.nNotConfidentCalls = 1L; if (printInterestingSites) System.out.println("Truth is not confident at " + call.getChr() + ":" + call.getStart()); writeVariant = false; } } else { // if (!vcComp.hasExtendedAttribute("GV")) // throw new UserException.BadInput("Variant has no GV annotation in the INFO // field. " + vcComp.getChr() + ":" + vcComp.getStart()); final GVstatus status = getGVstatus(vcComp); if (call.isCalledAlt(callConf)) { if (status == GVstatus.T) counter.nAltCalledAlt = 1L; else if (status == GVstatus.F) { counter.nRefCalledAlt = 1L; if (printInterestingSites) System.out.println("Truth=REF Call=ALT at " + call.getChr() + ":" + call.getStart()); } else counter.nNoStatusCalledAlt = 1L; } else if (call.isCalledRef(callConf)) { if (status == GVstatus.T) { counter.nAltCalledRef = 1L; if (printInterestingSites) System.out.println("Truth=ALT Call=REF at " + call.getChr() + ":" + call.getStart()); } else if (status == GVstatus.F) counter.nRefCalledRef = 1L; else counter.nNoStatusCalledRef = 1L; } else { counter.nNotConfidentCalls = 1L; if (status == GVstatus.T) counter.nAltNotCalled = 1L; else if (status == GVstatus.F) counter.nRefNotCalled = 1L; else counter.nNoStatusNotCalled = 1L; if (printInterestingSites) System.out.println("Truth is not confident at " + call.getChr() + ":" + call.getStart()); writeVariant = false; } } if (vcfWriter != null && writeVariant) { if (!vcComp.hasAttribute(GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY)) { vcfWriter.add( new VariantContextBuilder(vcComp) .attribute( GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY, call.isCalledAlt(callConf) ? "ALT" : "REF") .make()); } else vcfWriter.add(vcComp); } return counter; }