Пример #1
0
  protected final void printCallInfo(
      final VariantContext vc,
      final double[] log10AlleleFrequencyPriors,
      final long runtimeNano,
      final AFCalcResult result) {
    printCallElement(vc, "type", "ignore", vc.getType());

    int allelei = 0;
    for (final Allele a : vc.getAlleles())
      printCallElement(vc, "allele", allelei++, a.getDisplayString());

    for (final Genotype g : vc.getGenotypes())
      printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());

    for (int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++)
      printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);

    printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
    printCallElement(vc, "log10PosteriorOfAFEq0", "ignore", result.getLog10PosteriorOfAFEq0());
    printCallElement(vc, "log10PosteriorOfAFGt0", "ignore", result.getLog10PosteriorOfAFGT0());

    for (final Allele allele : result.getAllelesUsedInGenotyping()) {
      if (allele.isNonReference()) {
        printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele));
        printCallElement(
            vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele));
      }
    }

    callReport.flush();
  }
  @Requires({"eval != null", "comp != null"})
  private EvalCompMatchType doEvalAndCompMatch(
      final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
    // find all of the matching comps
    if (comp.getType() != eval.getType()) return EvalCompMatchType.NO_MATCH;

    // find the comp which matches both the reference allele and alternate allele from eval
    final Allele altEval =
        eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
    final Allele altComp =
        comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
    if ((altEval == null && altComp == null)
        || (altEval != null
            && altEval.equals(altComp)
            && eval.getReference().equals(comp.getReference()))) return EvalCompMatchType.STRICT;
    else return requireStrictAlleleMatch ? EvalCompMatchType.NO_MATCH : EvalCompMatchType.LENIENT;
  }
Пример #3
0
 private Type getType(VariantContext vc) {
   switch (vc.getType()) {
     case SNP:
       return Type.SNP;
     case INDEL:
       for (int l : vc.getIndelLengths()) if (Math.abs(l) > MAX_INDEL_LENGTH) return Type.CNV;
       return Type.INDEL;
     case SYMBOLIC:
       return Type.CNV;
     default:
       // throw new UserException.BadInput("Unexpected variant context type: " + vc);
       return null;
   }
 }
  public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(
      Collection<VariantContext> VCs) {
    HashMap<VariantContext.Type, List<VariantContext>> mappedVCs =
        new HashMap<VariantContext.Type, List<VariantContext>>();
    for (VariantContext vc : VCs) {

      // look at previous variant contexts of different type. If:
      // a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC
      // to  vc's list
      // b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather,
      // do nothing since vc will be added automatically to its list)
      // c) neither: do nothing, just add vc to its own list
      boolean addtoOwnList = true;
      for (VariantContext.Type type : VariantContext.Type.values()) {
        if (type.equals(vc.getType())) continue;

        if (!mappedVCs.containsKey(type)) continue;

        List<VariantContext> vcList = mappedVCs.get(type);
        for (int k = 0; k < vcList.size(); k++) {
          VariantContext otherVC = vcList.get(k);
          if (allelesAreSubset(otherVC, vc)) {
            // otherVC has a type different than vc and its alleles are a subset of vc: remove
            // otherVC from its list and add it to vc's type list
            vcList.remove(k);
            // avoid having empty lists
            if (vcList.size() == 0) mappedVCs.remove(vcList);
            if (!mappedVCs.containsKey(vc.getType()))
              mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
            mappedVCs.get(vc.getType()).add(otherVC);
            break;
          } else if (allelesAreSubset(vc, otherVC)) {
            // vc has a type different than otherVC and its alleles are a subset of VC: add vc to
            // otherVC's type list and don't add to its own
            mappedVCs.get(type).add(vc);
            addtoOwnList = false;
            break;
          }
        }
      }
      if (addtoOwnList) {
        if (!mappedVCs.containsKey(vc.getType()))
          mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
        mappedVCs.get(vc.getType()).add(vc);
      }
    }

    return mappedVCs;
  }
  public Allele getLikelihoods(
      RefMetaDataTracker tracker,
      ReferenceContext ref,
      Map<String, AlignmentContext> contexts,
      AlignmentContextUtils.ReadOrientation contextType,
      GenotypePriors priors,
      Map<String, MultiallelicGenotypeLikelihoods> GLs,
      Allele alternateAlleleToUse,
      boolean useBAQedPileup) {

    if (tracker == null) return null;

    GenomeLoc loc = ref.getLocus();
    Allele refAllele, altAllele;
    VariantContext vc = null;

    if (!ref.getLocus().equals(lastSiteVisited)) {
      // starting a new site: clear allele list
      alleleList.clear();
      lastSiteVisited = ref.getLocus();
      indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
      haplotypeMap.clear();

      if (getAlleleListFromVCF) {
        for (final VariantContext vc_input : tracker.getValues(UAC.alleles, loc)) {
          if (vc_input != null
              && allowableTypes.contains(vc_input.getType())
              && ref.getLocus().getStart() == vc_input.getStart()) {
            vc = vc_input;
            break;
          }
        }
        // ignore places where we don't have a variant
        if (vc == null) return null;

        alleleList.clear();
        if (ignoreSNPAllelesWhenGenotypingIndels) {
          // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore
          // it and don't genotype it
          for (Allele a : vc.getAlleles())
            if (a.isNonReference() && a.getBases().length == vc.getReference().getBases().length)
              continue;
            else alleleList.add(a);

        } else {
          for (Allele a : vc.getAlleles()) alleleList.add(a);
        }

      } else {
        alleleList = computeConsensusAlleles(ref, contexts, contextType);
        if (alleleList.isEmpty()) return null;
      }
    }
    // protect against having an indel too close to the edge of a contig
    if (loc.getStart() <= HAPLOTYPE_SIZE) return null;

    // check if there is enough reference window to create haplotypes (can be an issue at end of
    // contigs)
    if (ref.getWindow().getStop() < loc.getStop() + HAPLOTYPE_SIZE) return null;
    if (!(priors instanceof DiploidIndelGenotypePriors))
      throw new StingException(
          "Only diploid-based Indel priors are supported in the DINDEL GL model");

    if (alleleList.isEmpty()) return null;

    refAllele = alleleList.get(0);
    altAllele = alleleList.get(1);

    // look for alt allele that has biggest length distance to ref allele
    int maxLenDiff = 0;
    for (Allele a : alleleList) {
      if (a.isNonReference()) {
        int lenDiff = Math.abs(a.getBaseString().length() - refAllele.getBaseString().length());
        if (lenDiff > maxLenDiff) {
          maxLenDiff = lenDiff;
          altAllele = a;
        }
      }
    }

    final int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
    final int hsize = (int) ref.getWindow().size() - Math.abs(eventLength) - 1;
    final int numPrefBases = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;

    haplotypeMap =
        Haplotype.makeHaplotypeListFromAlleles(
            alleleList, loc.getStart(), ref, hsize, numPrefBases);

    // For each sample, get genotype likelihoods based on pileup
    // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with
    // them.
    // initialize the GenotypeLikelihoods
    GLs.clear();

    for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
      AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);

      ReadBackedPileup pileup = null;
      if (context.hasExtendedEventPileup()) pileup = context.getExtendedEventPileup();
      else if (context.hasBasePileup()) pileup = context.getBasePileup();

      if (pileup != null) {
        final double[] genotypeLikelihoods =
            pairModel.computeReadHaplotypeLikelihoods(
                pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());

        GLs.put(
            sample.getKey(),
            new MultiallelicGenotypeLikelihoods(
                sample.getKey(), alleleList, genotypeLikelihoods, getFilteredDepth(pileup)));

        if (DEBUG) {
          System.out.format("Sample:%s Alleles:%s GL:", sample.getKey(), alleleList.toString());
          for (int k = 0; k < genotypeLikelihoods.length; k++)
            System.out.format("%1.4f ", genotypeLikelihoods[k]);
          System.out.println();
        }
      }
    }

    return refAllele;
  }
Пример #6
0
  public String update1(
      VariantContext vc1,
      RefMetaDataTracker tracker,
      ReferenceContext ref,
      AlignmentContext context) {
    nCalledLoci++;

    // Note from Eric:
    // This is really not correct.  What we really want here is a polymorphic vs. monomorphic count
    // (i.e. on the Genotypes).
    // So in order to maintain consistency with the previous implementation (and the intention of
    // the original author), I've
    // added in a proxy check for monomorphic status here.
    // Protect against case when vc only as no-calls too - can happen if we strafity by sample and
    // sample as a single no-call.
    if (vc1.isMonomorphicInSamples()) {
      nRefLoci++;
    } else {
      switch (vc1.getType()) {
        case NO_VARIATION:
          // shouldn't get here
          break;
        case SNP:
          nVariantLoci++;
          nSNPs++;
          if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
          break;
        case MNP:
          nVariantLoci++;
          nMNPs++;
          if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
          break;
        case INDEL:
          nVariantLoci++;
          if (vc1.isSimpleInsertion()) nInsertions++;
          else if (vc1.isSimpleDeletion()) nDeletions++;
          else nComplex++;
          break;
        case MIXED:
          nVariantLoci++;
          nMixed++;
          break;
        case SYMBOLIC:
          nSymbolic++;
          break;
        default:
          throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType());
      }
    }

    String refStr = vc1.getReference().getBaseString().toUpperCase();

    String aaStr =
        vc1.hasAttribute("ANCESTRALALLELE")
            ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase()
            : null;
    //        if (aaStr.equals(".")) {
    //            aaStr = refStr;
    //        }

    // ref  aa  alt  class
    // A    C   A    der homozygote
    // A    C   C    anc homozygote

    // A    A   A    ref homozygote
    // A    A   C
    // A    C   A
    // A    C   C

    for (final Genotype g : vc1.getGenotypes()) {
      final String altStr =
          vc1.getAlternateAlleles().size() > 0
              ? vc1.getAlternateAllele(0).getBaseString().toUpperCase()
              : null;

      switch (g.getType()) {
        case NO_CALL:
          nNoCalls++;
          break;
        case HOM_REF:
          nHomRef++;

          if (aaStr != null && altStr != null && !refStr.equalsIgnoreCase(aaStr)) {
            nHomDerived++;
          }

          break;
        case HET:
          nHets++;
          break;
        case HOM_VAR:
          nHomVar++;

          if (aaStr != null && altStr != null && !altStr.equalsIgnoreCase(aaStr)) {
            nHomDerived++;
          }

          break;
        case MIXED:
          break;
        default:
          throw new ReviewedStingException("BUG: Unexpected genotype type: " + g);
      }
    }

    return null; // we don't capture any interesting sites
  }
Пример #7
0
  /**
   * Subset VC record if necessary and emit the modified record (provided it satisfies criteria for
   * printing)
   *
   * @param tracker the ROD tracker
   * @param ref reference information
   * @param context alignment info
   * @return 1 if the record was printed to the output file, 0 if otherwise
   */
  @Override
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null) return 0;

    Collection<VariantContext> vcs =
        tracker.getValues(variantCollection.variants, context.getLocation());

    if (vcs == null || vcs.size() == 0) {
      return 0;
    }

    for (VariantContext vc : vcs) {
      if (MENDELIAN_VIOLATIONS) {
        boolean foundMV = false;
        for (MendelianViolation mv : mvSet) {
          if (mv.isViolation(vc)) {
            foundMV = true;
            // System.out.println(vc.toString());
            if (outMVFile != null)
              outMVFileStream.format(
                  "MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, "
                      + "childG=%s childGL=%s\n",
                  vc.getChr(),
                  vc.getStart(),
                  vc.getReference().getDisplayString(),
                  vc.getAlternateAllele(0).getDisplayString(),
                  vc.getChromosomeCount(vc.getAlternateAllele(0)),
                  mv.getSampleMom(),
                  mv.getSampleDad(),
                  mv.getSampleChild(),
                  vc.getGenotype(mv.getSampleMom()).toBriefString(),
                  vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
                  vc.getGenotype(mv.getSampleDad()).toBriefString(),
                  vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
                  vc.getGenotype(mv.getSampleChild()).toBriefString(),
                  vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString());
          }
        }

        if (!foundMV) break;
      }
      if (DISCORDANCE_ONLY) {
        Collection<VariantContext> compVCs =
            tracker.getValues(discordanceTrack, context.getLocation());
        if (!isDiscordant(vc, compVCs)) return 0;
      }
      if (CONCORDANCE_ONLY) {
        Collection<VariantContext> compVCs =
            tracker.getValues(concordanceTrack, context.getLocation());
        if (!isConcordant(vc, compVCs)) return 0;
      }

      if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic())
        continue;

      if (alleleRestriction.equals(NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic())
        continue;

      if (!selectedTypes.contains(vc.getType())) continue;

      VariantContext sub = subsetRecord(vc, samples);
      if ((sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS)
          && (!sub.isFiltered() || !EXCLUDE_FILTERED)) {
        for (VariantContextUtils.JexlVCMatchExp jexl : jexls) {
          if (!VariantContextUtils.match(sub, jexl)) {
            return 0;
          }
        }
        if (SELECT_RANDOM_NUMBER) {
          randomlyAddVariant(++variantNumber, sub, ref.getBase());
        } else if (!SELECT_RANDOM_FRACTION
            || (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
          vcfWriter.add(sub);
        }
      }
    }

    return 1;
  }