/**
   * Returns a context identical to this with the REF and ALT alleles reverse complemented.
   *
   * @param vc variant context
   * @return new vc
   */
  public static VariantContext reverseComplement(VariantContext vc) {
    // create a mapping from original allele to reverse complemented allele
    HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
    for (Allele originalAllele : vc.getAlleles()) {
      Allele newAllele;
      if (originalAllele.isNoCall() || originalAllele.isNull()) newAllele = originalAllele;
      else
        newAllele =
            Allele.create(
                BaseUtils.simpleReverseComplement(originalAllele.getBases()),
                originalAllele.isReference());
      alleleMap.put(originalAllele, newAllele);
    }

    // create new Genotype objects
    GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
    for (final Genotype genotype : vc.getGenotypes()) {
      List<Allele> newAlleles = new ArrayList<Allele>();
      for (Allele allele : genotype.getAlleles()) {
        Allele newAllele = alleleMap.get(allele);
        if (newAllele == null) newAllele = Allele.NO_CALL;
        newAlleles.add(newAllele);
      }
      newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
    }

    return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
  }
Example #2
0
  /**
   * Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN,
   * AC, AF).
   *
   * @param vc the VariantContext record to subset
   * @param samples the samples to extract
   * @return the subsetted VariantContext
   */
  private VariantContext subsetRecord(VariantContext vc, Set<String> samples) {
    if (samples == null || samples.isEmpty()) return vc;

    ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
    for (Map.Entry<String, Genotype> genotypePair : vc.getGenotypes().entrySet()) {
      if (samples.contains(genotypePair.getKey())) genotypes.add(genotypePair.getValue());
    }

    VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles());

    // if we have fewer alternate alleles in the selected VC than in the original VC, we need to
    // strip out the GL/PLs (because they are no longer accurate)
    if (vc.getAlleles().size() != sub.getAlleles().size())
      sub = VariantContext.modifyGenotypes(sub, VariantContextUtils.stripPLs(vc.getGenotypes()));

    HashMap<String, Object> attributes = new HashMap<String, Object>(sub.getAttributes());

    int depth = 0;
    for (String sample : sub.getSampleNames()) {
      Genotype g = sub.getGenotype(sample);

      if (g.isNotFiltered() && g.isCalled()) {

        String dp = (String) g.getAttribute("DP");
        if (dp != null
            && !dp.equals(VCFConstants.MISSING_DEPTH_v3)
            && !dp.equals(VCFConstants.MISSING_VALUE_v4)) {
          depth += Integer.valueOf(dp);
        }
      }
    }

    if (KEEP_ORIGINAL_CHR_COUNTS) {
      if (attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY))
        attributes.put("AC_Orig", attributes.get(VCFConstants.ALLELE_COUNT_KEY));
      if (attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY))
        attributes.put("AF_Orig", attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY));
      if (attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY))
        attributes.put("AN_Orig", attributes.get(VCFConstants.ALLELE_NUMBER_KEY));
    }

    VariantContextUtils.calculateChromosomeCounts(sub, attributes, false);
    attributes.put("DP", depth);

    sub = VariantContext.modifyAttributes(sub, attributes);

    return sub;
  }
  @Test(dataProvider = "mergeFiltered")
  public void testMergeFiltered(MergeFilteredTest cfg) {
    final List<String> priority = vcs2priority(cfg.inputs);
    final VariantContext merged =
        VariantContextUtils.simpleMerge(
            genomeLocParser,
            cfg.inputs,
            priority,
            cfg.type,
            VariantContextUtils.GenotypeMergeType.PRIORITIZE,
            true,
            false,
            "set",
            false,
            false);

    // test alleles are equal
    Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());

    // test set field
    Assert.assertEquals(merged.getAttribute("set"), cfg.setExpected);

    // test filter field
    Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters());
  }
  @Test(dataProvider = "mergeAlleles")
  public void testMergeAlleles(MergeAllelesTest cfg) {
    final List<VariantContext> inputs = new ArrayList<VariantContext>();

    int i = 0;
    for (final List<Allele> alleles : cfg.inputs) {
      final String name = "vcf" + ++i;
      inputs.add(makeVC(name, alleles));
    }

    final List<String> priority = vcs2priority(inputs);

    final VariantContext merged =
        VariantContextUtils.simpleMerge(
            genomeLocParser,
            inputs,
            priority,
            VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
            VariantContextUtils.GenotypeMergeType.PRIORITIZE,
            false,
            false,
            "set",
            false,
            false);

    Assert.assertEquals(merged.getAlleles(), cfg.expected);
  }
  protected void printVerboseData(
      String pos,
      VariantContext vc,
      double PofF,
      double phredScaledConfidence,
      final GenotypeLikelihoodsCalculationModel.Model model) {
    Allele refAllele = null, altAllele = null;
    for (Allele allele : vc.getAlleles()) {
      if (allele.isReference()) refAllele = allele;
      else altAllele = allele;
    }

    for (int i = 0; i <= N; i++) {
      StringBuilder AFline = new StringBuilder("AFINFO\t");
      AFline.append(pos);
      AFline.append("\t");
      AFline.append(refAllele);
      AFline.append("\t");
      if (altAllele != null) AFline.append(altAllele);
      else AFline.append("N/A");
      AFline.append("\t");
      AFline.append(i + "/" + N + "\t");
      AFline.append(String.format("%.2f\t", ((float) i) / N));
      AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
      verboseWriter.println(AFline.toString());
    }

    verboseWriter.println("P(f>0) = " + PofF);
    verboseWriter.println("Qscore = " + phredScaledConfidence);
    verboseWriter.println();
  }
Example #6
0
  protected final void printCallInfo(
      final VariantContext vc,
      final double[] log10AlleleFrequencyPriors,
      final long runtimeNano,
      final AFCalcResult result) {
    printCallElement(vc, "type", "ignore", vc.getType());

    int allelei = 0;
    for (final Allele a : vc.getAlleles())
      printCallElement(vc, "allele", allelei++, a.getDisplayString());

    for (final Genotype g : vc.getGenotypes())
      printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());

    for (int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++)
      printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);

    printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
    printCallElement(vc, "log10PosteriorOfAFEq0", "ignore", result.getLog10PosteriorOfAFEq0());
    printCallElement(vc, "log10PosteriorOfAFGt0", "ignore", result.getLog10PosteriorOfAFGT0());

    for (final Allele allele : result.getAllelesUsedInGenotyping()) {
      if (allele.isNonReference()) {
        printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele));
        printCallElement(
            vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele));
      }
    }

    callReport.flush();
  }
Example #7
0
 @Override
 public String toString() {
   return String.format(
       "ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s",
       vc.getChr(),
       vc.getStart(),
       vc.getAlleles(),
       vc.getNSamples(),
       originalCall.getLog10PosteriorOfAFGT0(),
       new AutoFormattingTime(runtime / 1e9).toString());
 }
Example #8
0
 /**
  * Copy constructor
  *
  * @param other the VariantContext to copy
  */
 protected VariantContext(VariantContext other) {
   this(
       other.getSource(),
       other.getID(),
       other.getChr(),
       other.getStart(),
       other.getEnd(),
       other.getAlleles(),
       other.getGenotypes(),
       other.getLog10PError(),
       other.getFiltersMaybeNull(),
       other.getAttributes(),
       other.REFERENCE_BASE_FOR_INDEL,
       NO_VALIDATION);
 }
  private VariantCallContext generateEmptyContext(
      RefMetaDataTracker tracker,
      ReferenceContext ref,
      Map<String, AlignmentContext> stratifiedContexts,
      AlignmentContext rawContext) {
    VariantContext vc;
    if (UAC.GenotypingMode
        == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
      VariantContext vcInput =
          UnifiedGenotyperEngine.getVCFromAllelesRod(
              tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles);
      if (vcInput == null) return null;
      vc =
          new VariantContextBuilder(
                  "UG_call",
                  ref.getLocus().getContig(),
                  vcInput.getStart(),
                  vcInput.getEnd(),
                  vcInput.getAlleles())
              .make();
    } else {
      // deal with bad/non-standard reference bases
      if (!Allele.acceptableAlleleBases(new byte[] {ref.getBase()})) return null;

      Set<Allele> alleles = new HashSet<Allele>();
      alleles.add(Allele.create(ref.getBase(), true));
      vc =
          new VariantContextBuilder(
                  "UG_call",
                  ref.getLocus().getContig(),
                  ref.getLocus().getStart(),
                  ref.getLocus().getStart(),
                  alleles)
              .make();
    }

    if (annotationEngine != null) {
      // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
      final ReadBackedPileup pileup = rawContext.getBasePileup();
      stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);

      vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc);
    }

    return new VariantCallContext(vc, false);
  }
  private static AlleleMapper resolveIncompatibleAlleles(
      Allele refAllele, VariantContext vc, Set<Allele> allAlleles) {
    if (refAllele.equals(vc.getReference())) return new AlleleMapper(vc);
    else {
      // we really need to do some work.  The refAllele is the longest reference allele seen at this
      // start site.  So imagine it is:
      //
      // refAllele: ACGTGA
      // myRef:     ACGT
      // myAlt:     -
      //
      // We need to remap all of the alleles in vc to include the extra GA so that
      // myRef => refAllele and myAlt => GA
      //

      Allele myRef = vc.getReference();
      if (refAllele.length() <= myRef.length())
        throw new ReviewedStingException(
            "BUG: myRef=" + myRef + " is longer than refAllele=" + refAllele);
      byte[] extraBases =
          Arrays.copyOfRange(refAllele.getBases(), myRef.length(), refAllele.length());

      //            System.out.printf("Remapping allele at %s%n", vc);
      //            System.out.printf("ref   %s%n", refAllele);
      //            System.out.printf("myref %s%n", myRef );
      //            System.out.printf("extrabases %s%n", new String(extraBases));

      Map<Allele, Allele> map = new HashMap<Allele, Allele>();
      for (Allele a : vc.getAlleles()) {
        if (a.isReference()) map.put(a, refAllele);
        else {
          Allele extended = Allele.extend(a, extraBases);
          for (Allele b : allAlleles) if (extended.equals(b)) extended = b;
          //                    System.out.printf("  Extending %s => %s%n", a, extended);
          map.put(a, extended);
        }
      }

      // debugging
      //            System.out.printf("mapping %s%n", map);

      return new AlleleMapper(map);
    }
  }
  @Test(dataProvider = "mergeGenotypes")
  public void testMergeGenotypes(MergeGenotypesTest cfg) {
    final VariantContext merged =
        VariantContextUtils.simpleMerge(
            genomeLocParser,
            cfg.inputs,
            cfg.priority,
            VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
            VariantContextUtils.GenotypeMergeType.PRIORITIZE,
            true,
            false,
            "set",
            false,
            false);

    // test alleles are equal
    Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());

    // test genotypes
    assertGenotypesAreMostlyEqual(merged.getGenotypes(), cfg.expected.getGenotypes());
  }
Example #12
0
  public void writeBeagleOutput(
      VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) {
    GenomeLoc currentLoc =
        VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), preferredVC);
    StringBuffer beagleOut = new StringBuffer();

    String marker = String.format("%s:%d ", currentLoc.getContig(), currentLoc.getStart());
    beagleOut.append(marker);
    if (markers != null)
      markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
    for (Allele allele : preferredVC.getAlleles()) {
      String bglPrintString;
      if (allele.isNoCall() || allele.isNull()) bglPrintString = "-";
      else bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele

      beagleOut.append(String.format("%s ", bglPrintString));
      if (markers != null) markers.append(bglPrintString).append("\t");
    }
    if (markers != null) markers.append("\n");

    GenotypesContext preferredGenotypes = preferredVC.getGenotypes();
    GenotypesContext otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
    for (String sample : samples) {
      boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE;

      Genotype genotype;
      boolean isValidation;
      // use sample as key into genotypes structure
      if (preferredGenotypes.containsSample(sample)) {
        genotype = preferredGenotypes.get(sample);
        isValidation = isValidationSite;
      } else if (otherGenotypes != null && otherGenotypes.containsSample(sample)) {
        genotype = otherGenotypes.get(sample);
        isValidation = !isValidationSite;
      } else {
        // there is magically no genotype for this sample.
        throw new StingException(
            "Sample "
                + sample
                + " arose with no genotype in variant or validation VCF. This should never happen.");
      }

      /*
       * Use likelihoods if: is validation, prior is negative; or: is not validation, has genotype key
       */
      double[] log10Likelihoods = null;
      if ((isValidation && prior < 0.0) || genotype.hasLikelihoods()) {
        log10Likelihoods = genotype.getLikelihoods().getAsVector();

        // see if we need to randomly mask out genotype in this position.
        if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() <= insertedNoCallRate) {
          // we are masking out this genotype
          log10Likelihoods =
              isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS;
        }

        if (isMaleOnChrX) {
          log10Likelihoods[1] = -255; // todo -- warning this is dangerous for multi-allele case
        }
      }
      /** otherwise, use the prior uniformly */
      else if (!isValidation && genotype.isCalled() && !genotype.hasLikelihoods()) {
        // hack to deal with input VCFs with no genotype likelihoods.  Just assume the called
        // genotype
        // is confident.  This is useful for Hapmap and 1KG release VCFs.
        double AA = (1.0 - prior) / 2.0;
        double AB = (1.0 - prior) / 2.0;
        double BB = (1.0 - prior) / 2.0;

        if (genotype.isHomRef()) {
          AA = prior;
        } else if (genotype.isHet()) {
          AB = prior;
        } else if (genotype.isHomVar()) {
          BB = prior;
        }

        log10Likelihoods = MathUtils.toLog10(new double[] {AA, isMaleOnChrX ? 0.0 : AB, BB});
      } else {
        log10Likelihoods =
            isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS;
      }

      writeSampleLikelihoods(beagleOut, preferredVC, log10Likelihoods);
    }

    beagleWriter.println(beagleOut.toString());
  }
  /**
   * Main entry function to calculate genotypes of a given VC with corresponding GL's
   *
   * @param tracker Tracker
   * @param refContext Reference context
   * @param rawContext Raw context
   * @param stratifiedContexts Stratified alignment contexts
   * @param vc Input VC
   * @param model GL calculation model
   * @param inheritAttributesFromInputVC Output VC will contain attributes inherited from input vc
   * @return VC with assigned genotypes
   */
  public VariantCallContext calculateGenotypes(
      final RefMetaDataTracker tracker,
      final ReferenceContext refContext,
      final AlignmentContext rawContext,
      Map<String, AlignmentContext> stratifiedContexts,
      final VariantContext vc,
      final GenotypeLikelihoodsCalculationModel.Model model,
      final boolean inheritAttributesFromInputVC,
      final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap>
          perReadAlleleLikelihoodMap) {

    boolean limitedContext =
        tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;

    // initialize the data for this thread if that hasn't been done yet
    if (afcm.get() == null) {
      afcm.set(AFCalcFactory.createAFCalc(UAC, N, logger));
    }

    // estimate our confidence in a reference call and return
    if (vc.getNSamples() == 0) {
      if (limitedContext) return null;
      return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES
          ? estimateReferenceConfidence(vc, stratifiedContexts, getTheta(model), false, 1.0)
          : generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
    }

    AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));

    // is the most likely frequency conformation AC=0 for all alternate alleles?
    boolean bestGuessIsRef = true;

    // determine which alternate alleles have AF>0
    final List<Allele> myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
    final List<Integer> alleleCountsofMLE = new ArrayList<Integer>(vc.getAlleles().size());
    myAlleles.add(vc.getReference());
    for (int i = 0; i < AFresult.getAllelesUsedInGenotyping().size(); i++) {
      final Allele alternateAllele = AFresult.getAllelesUsedInGenotyping().get(i);
      if (alternateAllele.isReference()) continue;

      // we are non-ref if the probability of being non-ref > the emit confidence.
      // the emit confidence is phred-scaled, say 30 => 10^-3.
      // the posterior AF > 0 is log10: -5 => 10^-5
      // we are non-ref if 10^-5 < 10^-3 => -5 < -3
      final boolean isNonRef =
          AFresult.isPolymorphic(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING / -10.0);

      // if the most likely AC is not 0, then this is a good alternate allele to use
      if (isNonRef) {
        myAlleles.add(alternateAllele);
        alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
        bestGuessIsRef = false;
      }
      // if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
      else if (UAC.GenotypingMode
          == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
        myAlleles.add(alternateAllele);
        alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
      }
    }

    final double PoFGT0 = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0());

    // note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
    final double phredScaledConfidence =
        Math.abs(
            !bestGuessIsRef
                    || UAC.GenotypingMode
                        == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE
                            .GENOTYPE_GIVEN_ALLELES
                ? -10 * AFresult.getLog10PosteriorOfAFEq0()
                : -10 * AFresult.getLog10PosteriorOfAFGT0());

    // return a null call if we don't pass the confidence cutoff or the most likely allele frequency
    // is zero
    if (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES
        && !passesEmitThreshold(phredScaledConfidence, bestGuessIsRef)) {
      // technically, at this point our confidence in a reference call isn't accurately estimated
      //  because it didn't take into account samples with no data, so let's get a better estimate
      return limitedContext
          ? null
          : estimateReferenceConfidence(vc, stratifiedContexts, getTheta(model), true, PoFGT0);
    }

    // start constructing the resulting VC
    final GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
    final VariantContextBuilder builder =
        new VariantContextBuilder(
            "UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
    builder.log10PError(phredScaledConfidence / -10.0);
    if (!passesCallThreshold(phredScaledConfidence)) builder.filters(filter);

    // create the genotypes
    final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true, ploidy);
    builder.genotypes(genotypes);

    // print out stats if we have a writer
    if (verboseWriter != null && !limitedContext)
      printVerboseData(refContext.getLocus().toString(), vc, PoFGT0, phredScaledConfidence, model);

    // *** note that calculating strand bias involves overwriting data structures, so we do that
    // last
    final HashMap<String, Object> attributes = new HashMap<String, Object>();

    // inherit attributed from input vc if requested
    if (inheritAttributesFromInputVC) attributes.putAll(vc.getAttributes());
    // if the site was downsampled, record that fact
    if (!limitedContext && rawContext.hasPileupBeenDownsampled())
      attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);

    if (UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED)
      attributes.put(NUMBER_OF_DISCOVERED_ALLELES_KEY, vc.getAlternateAlleles().size());

    // add the MLE AC and AF annotations
    if (alleleCountsofMLE.size() > 0) {
      attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE);
      final int AN = builder.make().getCalledChrCount();
      final ArrayList<Double> MLEfrequencies = new ArrayList<Double>(alleleCountsofMLE.size());
      // the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT
      // is ./. but the exact model may arbitrarily choose an AC>1)
      for (int AC : alleleCountsofMLE) MLEfrequencies.add(Math.min(1.0, (double) AC / (double) AN));
      attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
    }

    if (UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef) {
      // final boolean DEBUG_SLOD = false;

      // the overall lod
      // double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
      double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
      // if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);

      List<Allele> allAllelesToUse = builder.make().getAlleles();

      // the forward lod
      VariantContext vcForward =
          calculateLikelihoods(
              tracker,
              refContext,
              stratifiedContexts,
              AlignmentContextUtils.ReadOrientation.FORWARD,
              allAllelesToUse,
              false,
              model,
              perReadAlleleLikelihoodMap);
      AFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model));
      // double[] normalizedLog10Posteriors =
      // MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
      double forwardLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0();
      double forwardLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
      // if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ",
      // forwardLog10PofF=" + forwardLog10PofF);

      // the reverse lod
      VariantContext vcReverse =
          calculateLikelihoods(
              tracker,
              refContext,
              stratifiedContexts,
              AlignmentContextUtils.ReadOrientation.REVERSE,
              allAllelesToUse,
              false,
              model,
              perReadAlleleLikelihoodMap);
      AFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model));
      // normalizedLog10Posteriors =
      // MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
      double reverseLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0();
      double reverseLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
      // if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ",
      // reverseLog10PofF=" + reverseLog10PofF);

      double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
      double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
      // if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" +
      // reverseLod);

      // strand score is max bias between forward and reverse strands
      double strandScore = Math.max(forwardLod, reverseLod);
      // rescale by a factor of 10
      strandScore *= 10.0;
      // logger.debug(String.format("SLOD=%f", strandScore));

      if (!Double.isNaN(strandScore)) attributes.put("SB", strandScore);
    }

    // finish constructing the resulting VC
    builder.attributes(attributes);
    VariantContext vcCall = builder.make();

    // if we are subsetting alleles (either because there were too many or because some were not
    // polymorphic)
    // then we may need to trim the alleles (because the original VariantContext may have had to pad
    // at the end).
    if (myAlleles.size() != vc.getAlleles().size()
        && !limitedContext) // limitedContext callers need to handle allele trimming on their own to
                            // keep their perReadAlleleLikelihoodMap alleles in sync
    vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);

    if (annotationEngine != null
        && !limitedContext) { // limitedContext callers need to handle annotations on their own by
                              // calling their own annotationEngine
      // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
      final ReadBackedPileup pileup = rawContext.getBasePileup();
      stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);

      vcCall =
          annotationEngine.annotateContext(
              tracker, refContext, stratifiedContexts, vcCall, perReadAlleleLikelihoodMap);
    }

    return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PoFGT0));
  }
 public Collection<Allele> values() {
   return map != null ? map.values() : vc.getAlleles();
 }
  public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
    // see if we need to trim common reference base from all alleles
    boolean trimVC;

    // We need to trim common reference base from all alleles in all genotypes if a ref base is
    // common to all alleles
    Allele refAllele = inputVC.getReference();
    if (!inputVC.isVariant()) trimVC = false;
    else if (refAllele.isNull()) trimVC = false;
    else {
      trimVC =
          (AbstractVCFCodec.computeForwardClipping(
                  new ArrayList<Allele>(inputVC.getAlternateAlleles()),
                  inputVC.getReference().getDisplayString())
              > 0);
    }

    // nothing to do if we don't need to trim bases
    if (trimVC) {
      List<Allele> alleles = new ArrayList<Allele>();
      GenotypesContext genotypes = GenotypesContext.create();

      // set the reference base for indels in the attributes
      Map<String, Object> attributes = new TreeMap<String, Object>(inputVC.getAttributes());

      Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();

      for (Allele a : inputVC.getAlleles()) {
        if (a.isSymbolic()) {
          alleles.add(a);
          originalToTrimmedAlleleMap.put(a, a);
        } else {
          // get bases for current allele and create a new one with trimmed bases
          byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
          Allele trimmedAllele = Allele.create(newBases, a.isReference());
          alleles.add(trimmedAllele);
          originalToTrimmedAlleleMap.put(a, trimmedAllele);
        }
      }

      // detect case where we're trimming bases but resulting vc doesn't have any null allele. In
      // that case, we keep original representation
      // example: mixed records such as {TA*,TGA,TG}
      boolean hasNullAlleles = false;

      for (Allele a : originalToTrimmedAlleleMap.values()) {
        if (a.isNull()) hasNullAlleles = true;
        if (a.isReference()) refAllele = a;
      }

      if (!hasNullAlleles) return inputVC;
      // now we can recreate new genotypes with trimmed alleles
      for (final Genotype genotype : inputVC.getGenotypes()) {

        List<Allele> originalAlleles = genotype.getAlleles();
        List<Allele> trimmedAlleles = new ArrayList<Allele>();
        for (Allele a : originalAlleles) {
          if (a.isCalled()) trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
          else trimmedAlleles.add(Allele.NO_CALL);
        }
        genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
      }

      final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
      return builder
          .alleles(alleles)
          .genotypes(genotypes)
          .attributes(attributes)
          .referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0]))
          .make();
    }

    return inputVC;
  }
  public static VariantContext createVariantContextWithPaddedAlleles(
      VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
    // see if we need to pad common reference base from all alleles
    boolean padVC;

    // We need to pad a VC with a common base if the length of the reference allele is less than the
    // length of the VariantContext.
    // This happens because the position of e.g. an indel is always one before the actual event (as
    // per VCF convention).
    long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1;
    if (inputVC.hasSymbolicAlleles()) padVC = true;
    else if (inputVC.getReference().length() == locLength) padVC = false;
    else if (inputVC.getReference().length() == locLength - 1) padVC = true;
    else
      throw new IllegalArgumentException(
          "Badly formed variant context at location "
              + String.valueOf(inputVC.getStart())
              + " in contig "
              + inputVC.getChr()
              + ". Reference length must be at most one base shorter than location size");

    // nothing to do if we don't need to pad bases
    if (padVC) {
      if (!inputVC.hasReferenceBaseForIndel())
        throw new ReviewedStingException(
            "Badly formed variant context at location "
                + inputVC.getChr()
                + ":"
                + inputVC.getStart()
                + "; no padded reference base is available.");

      Byte refByte = inputVC.getReferenceBaseForIndel();

      List<Allele> alleles = new ArrayList<Allele>();

      for (Allele a : inputVC.getAlleles()) {
        // get bases for current allele and create a new one with trimmed bases
        if (a.isSymbolic()) {
          alleles.add(a);
        } else {
          String newBases;
          if (refBaseShouldBeAppliedToEndOfAlleles)
            newBases = a.getBaseString() + new String(new byte[] {refByte});
          else newBases = new String(new byte[] {refByte}) + a.getBaseString();
          alleles.add(Allele.create(newBases, a.isReference()));
        }
      }

      // now we can recreate new genotypes with trimmed alleles
      GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
      for (final Genotype g : inputVC.getGenotypes()) {
        List<Allele> inAlleles = g.getAlleles();
        List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
        for (Allele a : inAlleles) {
          if (a.isCalled()) {
            if (a.isSymbolic()) {
              newGenotypeAlleles.add(a);
            } else {
              String newBases;
              if (refBaseShouldBeAppliedToEndOfAlleles)
                newBases = a.getBaseString() + new String(new byte[] {refByte});
              else newBases = new String(new byte[] {refByte}) + a.getBaseString();
              newGenotypeAlleles.add(Allele.create(newBases, a.isReference()));
            }
          } else {
            // add no-call allele
            newGenotypeAlleles.add(Allele.NO_CALL);
          }
        }
        genotypes.add(
            new Genotype(
                g.getSampleName(),
                newGenotypeAlleles,
                g.getLog10PError(),
                g.getFilters(),
                g.getAttributes(),
                g.isPhased()));
      }

      return new VariantContextBuilder(inputVC).alleles(alleles).genotypes(genotypes).make();
    } else return inputVC;
  }
  public Allele getLikelihoods(
      RefMetaDataTracker tracker,
      ReferenceContext ref,
      Map<String, AlignmentContext> contexts,
      AlignmentContextUtils.ReadOrientation contextType,
      GenotypePriors priors,
      Map<String, MultiallelicGenotypeLikelihoods> GLs,
      Allele alternateAlleleToUse,
      boolean useBAQedPileup) {

    if (tracker == null) return null;

    GenomeLoc loc = ref.getLocus();
    Allele refAllele, altAllele;
    VariantContext vc = null;

    if (!ref.getLocus().equals(lastSiteVisited)) {
      // starting a new site: clear allele list
      alleleList.clear();
      lastSiteVisited = ref.getLocus();
      indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
      haplotypeMap.clear();

      if (getAlleleListFromVCF) {
        for (final VariantContext vc_input : tracker.getValues(UAC.alleles, loc)) {
          if (vc_input != null
              && allowableTypes.contains(vc_input.getType())
              && ref.getLocus().getStart() == vc_input.getStart()) {
            vc = vc_input;
            break;
          }
        }
        // ignore places where we don't have a variant
        if (vc == null) return null;

        alleleList.clear();
        if (ignoreSNPAllelesWhenGenotypingIndels) {
          // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore
          // it and don't genotype it
          for (Allele a : vc.getAlleles())
            if (a.isNonReference() && a.getBases().length == vc.getReference().getBases().length)
              continue;
            else alleleList.add(a);

        } else {
          for (Allele a : vc.getAlleles()) alleleList.add(a);
        }

      } else {
        alleleList = computeConsensusAlleles(ref, contexts, contextType);
        if (alleleList.isEmpty()) return null;
      }
    }
    // protect against having an indel too close to the edge of a contig
    if (loc.getStart() <= HAPLOTYPE_SIZE) return null;

    // check if there is enough reference window to create haplotypes (can be an issue at end of
    // contigs)
    if (ref.getWindow().getStop() < loc.getStop() + HAPLOTYPE_SIZE) return null;
    if (!(priors instanceof DiploidIndelGenotypePriors))
      throw new StingException(
          "Only diploid-based Indel priors are supported in the DINDEL GL model");

    if (alleleList.isEmpty()) return null;

    refAllele = alleleList.get(0);
    altAllele = alleleList.get(1);

    // look for alt allele that has biggest length distance to ref allele
    int maxLenDiff = 0;
    for (Allele a : alleleList) {
      if (a.isNonReference()) {
        int lenDiff = Math.abs(a.getBaseString().length() - refAllele.getBaseString().length());
        if (lenDiff > maxLenDiff) {
          maxLenDiff = lenDiff;
          altAllele = a;
        }
      }
    }

    final int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
    final int hsize = (int) ref.getWindow().size() - Math.abs(eventLength) - 1;
    final int numPrefBases = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;

    haplotypeMap =
        Haplotype.makeHaplotypeListFromAlleles(
            alleleList, loc.getStart(), ref, hsize, numPrefBases);

    // For each sample, get genotype likelihoods based on pileup
    // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with
    // them.
    // initialize the GenotypeLikelihoods
    GLs.clear();

    for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
      AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);

      ReadBackedPileup pileup = null;
      if (context.hasExtendedEventPileup()) pileup = context.getExtendedEventPileup();
      else if (context.hasBasePileup()) pileup = context.getBasePileup();

      if (pileup != null) {
        final double[] genotypeLikelihoods =
            pairModel.computeReadHaplotypeLikelihoods(
                pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());

        GLs.put(
            sample.getKey(),
            new MultiallelicGenotypeLikelihoods(
                sample.getKey(), alleleList, genotypeLikelihoods, getFilteredDepth(pileup)));

        if (DEBUG) {
          System.out.format("Sample:%s Alleles:%s GL:", sample.getKey(), alleleList.toString());
          for (int k = 0; k < genotypeLikelihoods.length; k++)
            System.out.format("%1.4f ", genotypeLikelihoods[k]);
          System.out.println();
        }
      }
    }

    return refAllele;
  }
Example #18
0
  /**
   * add a record to the file
   *
   * @param vc the Variant Context object
   * @param refBase the ref base used for indels
   * @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE
   *     EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER
   *     THE EVENT INSTEAD)
   */
  public void add(VariantContext vc, byte refBase, boolean refBaseShouldBeAppliedToEndOfAlleles) {
    if (mHeader == null)
      throw new IllegalStateException(
          "The VCF Header must be written before records can be added: " + locationString());

    if (doNotWriteGenotypes) vc = VariantContext.modifyGenotypes(vc, null);

    try {
      vc =
          VariantContext.createVariantContextWithPaddedAlleles(
              vc, refBase, refBaseShouldBeAppliedToEndOfAlleles);

      // if we are doing on the fly indexing, add the record ***before*** we write any bytes
      if (indexer != null) indexer.addFeature(vc, positionalStream.getPosition());

      Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
      alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup

      // CHROM
      mWriter.write(vc.getChr());
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // POS
      mWriter.write(String.valueOf(vc.getStart()));
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // ID
      String ID = vc.hasID() ? vc.getID() : VCFConstants.EMPTY_ID_FIELD;
      mWriter.write(ID);
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // REF
      alleleMap.put(vc.getReference(), "0");
      String refString = vc.getReference().getDisplayString();
      mWriter.write(refString);
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // ALT
      if (vc.isVariant()) {
        Allele altAllele = vc.getAlternateAllele(0);
        alleleMap.put(altAllele, "1");
        String alt = altAllele.getDisplayString();
        mWriter.write(alt);

        for (int i = 1; i < vc.getAlternateAlleles().size(); i++) {
          altAllele = vc.getAlternateAllele(i);
          alleleMap.put(altAllele, String.valueOf(i + 1));
          alt = altAllele.getDisplayString();
          mWriter.write(",");
          mWriter.write(alt);
        }
      } else {
        mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD);
      }
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // QUAL
      if (!vc.hasNegLog10PError()) mWriter.write(VCFConstants.MISSING_VALUE_v4);
      else mWriter.write(getQualValue(vc.getPhredScaledQual()));
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // FILTER
      String filters =
          vc.isFiltered()
              ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters()))
              : (filtersWereAppliedToContext || vc.filtersWereApplied()
                  ? VCFConstants.PASSES_FILTERS_v4
                  : VCFConstants.UNFILTERED);
      mWriter.write(filters);
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // INFO
      Map<String, String> infoFields = new TreeMap<String, String>();
      for (Map.Entry<String, Object> field : vc.getAttributes().entrySet()) {
        String key = field.getKey();
        if (key.equals(VariantContext.ID_KEY)
            || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)
            || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY)
            || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY)) continue;

        String outputValue = formatVCFField(field.getValue());
        if (outputValue != null) infoFields.put(key, outputValue);
      }
      writeInfoString(infoFields);

      // FORMAT
      if (vc.hasAttribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY)) {
        mWriter.write(VCFConstants.FIELD_SEPARATOR);
        mWriter.write(vc.getAttributeAsString(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, ""));
      } else {
        List<String> genotypeAttributeKeys = new ArrayList<String>();
        if (vc.hasGenotypes()) {
          genotypeAttributeKeys.addAll(calcVCFGenotypeKeys(vc));
        } else if (mHeader.hasGenotypingData()) {
          // this needs to be done in case all samples are no-calls
          genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
        }

        if (genotypeAttributeKeys.size() > 0) {
          String genotypeFormatString =
              ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);
          mWriter.write(VCFConstants.FIELD_SEPARATOR);
          mWriter.write(genotypeFormatString);

          addGenotypeData(vc, alleleMap, genotypeAttributeKeys);
        }
      }

      mWriter.write("\n");
      mWriter.flush(); // necessary so that writing to an output stream will work
    } catch (IOException e) {
      throw new RuntimeException("Unable to write the VCF object to " + locationString());
    }
  }