コード例 #1
0
 private GVstatus getGVstatus(final VariantContext vc) {
   return (!vc.hasAttribute("GV"))
       ? GVstatus.NONE
       : (vc.getAttribute("GV").equals("T") ? GVstatus.T : GVstatus.F);
 }
コード例 #2
0
  @Override
  public void accumulate(final VariantContext ctx) {
    logger.record(ctx.getContig(), ctx.getStart());

    final String variantChrom = ctx.getContig();
    final int variantPos = ctx.getStart();

    // Skip anything a little too funky
    if (ctx.isFiltered()) return;
    if (!ctx.isVariant()) return;
    if (SKIP_CHROMS.contains(variantChrom)) return;

    for (final MendelianViolationMetrics trio : trios) {
      final Genotype momGt = ctx.getGenotype(trio.MOTHER);
      final Genotype dadGt = ctx.getGenotype(trio.FATHER);
      final Genotype kidGt = ctx.getGenotype(trio.OFFSPRING);

      // if any genotype:
      // - has a non-snp allele; or
      // - lacks a reference allele
      //
      // then ignore this trio
      if (CollectionUtil.makeList(momGt, dadGt, kidGt)
          .stream()
          .anyMatch(
              gt ->
                  gt.isHetNonRef()
                      || Stream.concat(Stream.of(ctx.getReference()), gt.getAlleles().stream())
                          .anyMatch(a -> a.length() != 1 || a.isSymbolic()))) {
        continue;
      }

      // if between the trio there are more than 2 alleles including the reference, continue
      if (Stream.concat(
                  Collections.singleton(ctx.getReference()).stream(),
                  CollectionUtil.makeList(momGt, dadGt, kidGt)
                      .stream()
                      .flatMap(gt -> gt.getAlleles().stream()))
              .collect(Collectors.toSet())
              .size()
          > 2) continue;

      // Test to make sure:
      //   1) That the site is in fact variant in the trio
      //   2) that the offspring doesn't have a really wacky het allele balance
      if (!isVariant(momGt, dadGt, kidGt)) continue;
      if (kidGt.isHet()) {
        final int[] ad = kidGt.getAD();
        if (ad == null) continue;

        final List<Integer> adOfAlleles =
            kidGt
                .getAlleles()
                .stream()
                .map(a -> ad[ctx.getAlleleIndex(a)])
                .collect(Collectors.toList());
        final double minAlleleFraction =
            Math.min(adOfAlleles.get(0), adOfAlleles.get(1))
                / (double) (adOfAlleles.get(0) + adOfAlleles.get(1));
        if (minAlleleFraction < MIN_HET_FRACTION) continue;
      }

      ///////////////////////////////////////////////////////////////
      // Determine whether the offspring should be haploid at this
      // locus and which is the parental donor of the haploid genotype
      ///////////////////////////////////////////////////////////////
      boolean haploid = false;
      Genotype haploidParentalGenotype = null;

      if (FEMALE_CHROMS.contains(variantChrom) && trio.OFFSPRING_SEX != Sex.Unknown) {
        if (trio.OFFSPRING_SEX == Sex.Female) {
          // famale
          haploid = false;
        } else if (isInPseudoAutosomalRegion(variantChrom, variantPos)) {
          // male but in PAR on X, so diploid
          haploid = false;
        } else {
          // male, out of PAR on X, haploid
          haploid = true;
          haploidParentalGenotype = momGt;
        }
      }

      // the PAR on the male chromosome should be masked so that reads
      // align to the female chromosomes instead, so there's no point
      // of worrying about that here.

      if (MALE_CHROMS.contains(variantChrom)) {
        if (trio.OFFSPRING_SEX == Sex.Male) {
          haploid = true;
          haploidParentalGenotype = dadGt;
        } else {
          continue;
        }
      }

      // We only want to look at sites where we have high enough confidence that the genotypes we
      // are looking at are
      // interesting.  We want to ensure that parents are always GQ>=MIN_GQ, and that the kid is
      // either GQ>=MIN_GQ or in the
      // case where kid is het that the phred-scaled-likelihood of being reference is >=MIN_GQ.
      if (haploid
          && (haploidParentalGenotype.isNoCall() || haploidParentalGenotype.getGQ() < MIN_GQ))
        continue;
      if (!haploid
          && (momGt.isNoCall()
              || momGt.getGQ() < MIN_GQ
              || dadGt.isNoCall()
              || dadGt.getGQ() < MIN_GQ)) continue;
      if (kidGt.isNoCall()) continue;
      if (momGt.isHomRef() && dadGt.isHomRef() && !kidGt.isHomRef()) {
        if (kidGt.getPL()[0] < MIN_GQ) continue;
      } else if (kidGt.getGQ() < MIN_GQ) continue;

      // Also filter on the DP for each of the samples - it's possible to miss hets when DP is too
      // low
      if (haploid && (kidGt.getDP() < MIN_DP || haploidParentalGenotype.getDP() < MIN_DP)) continue;
      if (!haploid && (kidGt.getDP() < MIN_DP || momGt.getDP() < MIN_DP || dadGt.getDP() < MIN_DP))
        continue;

      trio.NUM_VARIANT_SITES++;

      ///////////////////////////////////////////////////////////////
      // First test for haploid violations
      ///////////////////////////////////////////////////////////////
      MendelianViolation type = null;
      if (haploid) {
        if (kidGt.isHet()) continue; // Should not see heterozygous calls at haploid regions

        if (!haploidParentalGenotype.getAlleles().contains(kidGt.getAllele(0))) {
          if (kidGt.isHomRef()) {
            type = MendelianViolation.Haploid_Other;
            trio.NUM_HAPLOID_OTHER++;
          } else {
            type = MendelianViolation.Haploid_Denovo;
            trio.NUM_HAPLOID_DENOVO++;
          }
        }
      }
      ///////////////////////////////////////////////////////////////
      // Then test for diploid mendelian violations
      ///////////////////////////////////////////////////////////////
      else if (isMendelianViolation(momGt, dadGt, kidGt)) {
        if (momGt.isHomRef() && dadGt.isHomRef() && !kidGt.isHomRef()) {
          trio.NUM_DIPLOID_DENOVO++;
          type = MendelianViolation.Diploid_Denovo;
        } else if (momGt.isHomVar() && dadGt.isHomVar() && kidGt.isHet()) {
          trio.NUM_HOMVAR_HOMVAR_HET++;
          type = MendelianViolation.HomVar_HomVar_Het;
        } else if (kidGt.isHom()
            && ((momGt.isHomRef() && dadGt.isHomVar()) || (momGt.isHomVar() && dadGt.isHomRef()))) {
          trio.NUM_HOMREF_HOMVAR_HOM++;
          type = MendelianViolation.HomRef_HomVar_Hom;
        } else if (kidGt.isHom()
            && ((momGt.isHom() && dadGt.isHet()) || (momGt.isHet() && dadGt.isHom()))) {
          trio.NUM_HOM_HET_HOM++;
          type = MendelianViolation.Hom_Het_Hom;
        } else {
          trio.NUM_OTHER++;
          type = MendelianViolation.Other;
        }
      }

      // Output a record into the family's violation VCF
      if (type != null) {
        // Create a new Context subsetted to the three samples
        final VariantContextBuilder builder = new VariantContextBuilder(ctx);
        builder.genotypes(
            ctx.getGenotypes()
                .subsetToSamples(CollectionUtil.makeSet(trio.MOTHER, trio.FATHER, trio.OFFSPRING)));
        builder.attribute(MENDELIAN_VIOLATION_KEY, type.name());

        // Copy over some useful attributes from the full context
        if (ctx.hasAttribute(VCFConstants.ALLELE_COUNT_KEY))
          builder.attribute(ORIGINAL_AC, ctx.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
        if (ctx.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY))
          builder.attribute(ORIGINAL_AF, ctx.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY));
        if (ctx.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY))
          builder.attribute(ORIGINAL_AN, ctx.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));

        // Write out the variant record
        familyToViolations.get(trio.FAMILY_ID).add(builder.make());
      }
    }
  }
コード例 #3
0
  public CountedData map(
      RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

    final CountedData counter = new CountedData();

    // For some reason RodWalkers get map calls with null trackers
    if (tracker == null) return counter;

    VariantContext vcComp = tracker.getFirstValue(alleles);
    if (vcComp == null) return counter;

    // todo - not sure I want this, may be misleading to filter extended indel events.
    if (isInsideExtendedIndel(vcComp, ref)) return counter;

    // Do not operate on variants that are not covered to the optional minimum depth
    if (!context.hasReads()
        || (minDepth > 0 && context.getBasePileup().getBases().length < minDepth)) {
      counter.nUncovered = 1L;
      final GVstatus status = getGVstatus(vcComp);
      if (status == GVstatus.T) counter.nAltNotCalled = 1L;
      else if (status == GVstatus.F) counter.nRefNotCalled = 1L;
      else counter.nNoStatusNotCalled = 1L;

      return counter;
    }

    VariantCallContext call;
    if (vcComp.isSNP()) {
      call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0);
    } else if (vcComp.isIndel()) {
      call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0);
    } else if (bamIsTruth) {
      // assume it's a SNP if no variation is present; this is necessary so that we can test
      // supposed monomorphic sites against the truth bam
      call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0);
    } else {
      logger.info(
          "Not SNP or INDEL "
              + vcComp.getChr()
              + ":"
              + vcComp.getStart()
              + " "
              + vcComp.getAlleles());
      return counter;
    }

    boolean writeVariant = true;

    if (bamIsTruth) {
      if (call.confidentlyCalled) {
        // If truth is a confident REF call
        if (call.isVariant()) {
          if (vcComp.isVariant()) counter.nAltCalledAlt = 1L;
          else {
            counter.nAltCalledRef = 1L;
            if (printInterestingSites)
              System.out.println("Truth=ALT Call=REF at " + call.getChr() + ":" + call.getStart());
          }
        }
        // If truth is a confident ALT call
        else {
          if (vcComp.isVariant()) {
            counter.nRefCalledAlt = 1L;
            if (printInterestingSites)
              System.out.println("Truth=REF Call=ALT at " + call.getChr() + ":" + call.getStart());
          } else counter.nRefCalledRef = 1L;
        }
      } else {
        counter.nNotConfidentCalls = 1L;
        if (printInterestingSites)
          System.out.println("Truth is not confident at " + call.getChr() + ":" + call.getStart());
        writeVariant = false;
      }
    } else {
      //            if (!vcComp.hasExtendedAttribute("GV"))
      //                throw new UserException.BadInput("Variant has no GV annotation in the INFO
      // field. " + vcComp.getChr() + ":" + vcComp.getStart());

      final GVstatus status = getGVstatus(vcComp);
      if (call.isCalledAlt(callConf)) {
        if (status == GVstatus.T) counter.nAltCalledAlt = 1L;
        else if (status == GVstatus.F) {
          counter.nRefCalledAlt = 1L;
          if (printInterestingSites)
            System.out.println("Truth=REF Call=ALT at " + call.getChr() + ":" + call.getStart());
        } else counter.nNoStatusCalledAlt = 1L;
      } else if (call.isCalledRef(callConf)) {
        if (status == GVstatus.T) {
          counter.nAltCalledRef = 1L;
          if (printInterestingSites)
            System.out.println("Truth=ALT Call=REF at " + call.getChr() + ":" + call.getStart());
        } else if (status == GVstatus.F) counter.nRefCalledRef = 1L;
        else counter.nNoStatusCalledRef = 1L;
      } else {
        counter.nNotConfidentCalls = 1L;
        if (status == GVstatus.T) counter.nAltNotCalled = 1L;
        else if (status == GVstatus.F) counter.nRefNotCalled = 1L;
        else counter.nNoStatusNotCalled = 1L;

        if (printInterestingSites)
          System.out.println("Truth is not confident at " + call.getChr() + ":" + call.getStart());
        writeVariant = false;
      }
    }

    if (vcfWriter != null && writeVariant) {
      if (!vcComp.hasAttribute(GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY)) {
        vcfWriter.add(
            new VariantContextBuilder(vcComp)
                .attribute(
                    GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY,
                    call.isCalledAlt(callConf) ? "ALT" : "REF")
                .make());
      } else vcfWriter.add(vcComp);
    }
    return counter;
  }