private static void addGenotypeTests(final VariantContext site, Genotype... genotypes) {
    // for each sites VC, we are going to add create two root genotypes.
    // The first is the primary, and will be added to each new test
    // The second is variable.  In some tests it's absent (testing 1 genotype), in others it is
    // duplicated
    // 1 once, 10, 100, or 1000 times to test scaling

    final VariantContextBuilder builder = new VariantContextBuilder(site);

    // add a single context
    builder.genotypes(genotypes[0]);
    add(builder);

    if (genotypes.length > 1) {
      // add all
      add(builder.genotypes(Arrays.asList(genotypes)));

      // add all with the last replicated 10x and 100x times
      for (int nCopiesOfLast : Arrays.asList(10, 100, 1000)) {
        final GenotypesContext gc = new GenotypesContext();
        final Genotype last = genotypes[genotypes.length - 1];
        for (int i = 0; i < genotypes.length - 1; i++) gc.add(genotypes[i]);
        for (int i = 0; i < nCopiesOfLast; i++)
          gc.add(new GenotypeBuilder(last).name("copy" + i).make());
        add(builder.genotypes(gc));
      }
    }
  }
 /**
  * Update the attributes of the attributes map in the VariantContextBuilder to reflect the proper
  * chromosome-based VCF tags based on the current VC produced by builder.make()
  *
  * @param builder the VariantContextBuilder we are updating
  * @param removeStaleValues should we remove stale values from the mapping?
  */
 public static void calculateChromosomeCounts(
     VariantContextBuilder builder, boolean removeStaleValues) {
   final VariantContext vc = builder.make();
   final Map<String, Object> attrs =
       calculateChromosomeCounts(
           vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues);
   builder.attributes(attrs);
 }
  public static void addComplexGenotypesTest() {
    final List<Allele> allAlleles =
        Arrays.asList(
            Allele.create("A", true), Allele.create("C", false), Allele.create("G", false));

    for (int nAlleles : Arrays.asList(2, 3)) {
      for (int highestPloidy : Arrays.asList(1, 2, 3)) {
        // site alleles
        final List<Allele> siteAlleles = allAlleles.subList(0, nAlleles);

        // possible alleles for genotypes
        final List<Allele> possibleGenotypeAlleles = new ArrayList<Allele>(siteAlleles);
        possibleGenotypeAlleles.add(Allele.NO_CALL);

        // there are n^ploidy possible genotypes
        final List<List<Allele>> possibleGenotypes =
            makeAllGenotypes(possibleGenotypeAlleles, highestPloidy);
        final int nPossibleGenotypes = possibleGenotypes.size();

        VariantContextBuilder vb = new VariantContextBuilder("unittest", "1", 1, 1, siteAlleles);

        // first test -- create n copies of each genotype
        for (int i = 0; i < nPossibleGenotypes; i++) {
          final List<Genotype> samples = new ArrayList<Genotype>(3);
          samples.add(GenotypeBuilder.create("sample" + i, possibleGenotypes.get(i)));
          add(vb.genotypes(samples));
        }

        // second test -- create one sample with each genotype
        {
          final List<Genotype> samples = new ArrayList<Genotype>(nPossibleGenotypes);
          for (int i = 0; i < nPossibleGenotypes; i++) {
            samples.add(GenotypeBuilder.create("sample" + i, possibleGenotypes.get(i)));
          }
          add(vb.genotypes(samples));
        }

        // test mixed ploidy
        for (int i = 0; i < nPossibleGenotypes; i++) {
          for (int ploidy = 1; ploidy < highestPloidy; ploidy++) {
            final List<Genotype> samples = new ArrayList<Genotype>(highestPloidy);
            final List<Allele> genotype = possibleGenotypes.get(i).subList(0, ploidy);
            samples.add(GenotypeBuilder.create("sample" + i, genotype));
            add(vb.genotypes(samples));
          }
        }
      }
    }
  }
    private RepeatDetectorTest(
        boolean isTrueRepeat, String ref, String refAlleleString, String... altAlleleStrings) {
      super(RepeatDetectorTest.class);
      this.ref = "N" + ref; // add a dummy base for the event here
      this.isTrueRepeat = isTrueRepeat;

      List<Allele> alleles = new LinkedList<Allele>();
      final Allele refAllele = Allele.create(refAlleleString, true);
      alleles.add(refAllele);
      for (final String altString : altAlleleStrings) {
        final Allele alt = Allele.create(altString, false);
        alleles.add(alt);
      }

      VariantContextBuilder builder =
          new VariantContextBuilder("test", "chr1", 1, 1 + refAllele.length(), alleles);
      this.vc = builder.make();
    }
    public String toString() {
      StringBuilder b = new StringBuilder();
      b.append("VariantContextTestData: [");
      final VariantContext vc = vcs.get(0);
      final VariantContextBuilder builder = new VariantContextBuilder(vc);
      builder.noGenotypes();
      b.append(builder.make().toString());
      if (vc.getNSamples() < 5) {
        for (final Genotype g : vc.getGenotypes()) b.append(g.toString());
      } else {
        b.append(" nGenotypes = ").append(vc.getNSamples());
      }

      if (vcs.size() > 1)
        b.append(" ----- with another ").append(vcs.size() - 1).append(" VariantContext records");
      b.append("]");
      return b.toString();
    }
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null || !BaseUtils.isRegularBase(ref.getBase())) return 0;

    Collection<VariantContext> contexts = getVariantContexts(tracker, ref);

    for (VariantContext vc : contexts) {
      VariantContextBuilder builder = new VariantContextBuilder(vc);

      // set the appropriate sample name if necessary
      if (sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName())) {
        Genotype g =
            new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make();
        builder.genotypes(g);
      }

      final VariantContext withID = variantOverlapAnnotator.annotateRsID(tracker, builder.make());
      writeRecord(withID, tracker, ref.getLocus());
    }

    return 1;
  }
  private static void addGenotypesAndGTests() {
    //        for ( final int ploidy : Arrays.asList(2)) {
    for (final int ploidy : Arrays.asList(1, 2, 3, 4, 5)) {
      final List<List<String>> alleleCombinations =
          Arrays.asList(
              Arrays.asList("A"),
              Arrays.asList("A", "C"),
              Arrays.asList("A", "C", "G"),
              Arrays.asList("A", "C", "G", "T"));

      for (final List<String> alleles : alleleCombinations) {
        final VariantContextBuilder vcb = builder().alleles(alleles);
        final VariantContext site = vcb.make();
        final int nAlleles = site.getNAlleles();
        final Allele ref = site.getReference();

        // base genotype is ref/.../ref up to ploidy
        final List<Allele> baseGenotype = new ArrayList<Allele>(ploidy);
        for (int i = 0; i < ploidy; i++) baseGenotype.add(ref);
        final int nPLs = GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy);

        // ada is 0, 1, ..., nAlleles - 1
        final List<Integer> ada = new ArrayList<Integer>(nAlleles);
        for (int i = 0; i < nAlleles - 1; i++) ada.add(i);

        // pl is 0, 1, ..., up to nPLs (complex calc of nAlleles and ploidy)
        final int[] pl = new int[nPLs];
        for (int i = 0; i < pl.length; i++) pl[i] = i;

        final GenotypeBuilder gb = new GenotypeBuilder("ADA_PL_SAMPLE");
        gb.alleles(baseGenotype);
        gb.PL(pl);
        gb.attribute("ADA", nAlleles == 2 ? ada.get(0) : ada);
        vcb.genotypes(gb.make());

        add(vcb);
      }
    }
  }
  public static VariantContextBuilder pruneVariantContext(
      final VariantContextBuilder builder, Collection<String> keysToPreserve) {
    final VariantContext vc = builder.make();
    if (keysToPreserve == null) keysToPreserve = Collections.emptyList();

    // VC info
    final Map<String, Object> attributes = subsetAttributes(vc.commonInfo, keysToPreserve);

    // Genotypes
    final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
    for (final Genotype g : vc.getGenotypes()) {
      Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve);
      genotypes.add(
          new Genotype(
              g.getSampleName(),
              g.getAlleles(),
              g.getLog10PError(),
              g.getFilters(),
              genotypeAttributes,
              g.isPhased()));
    }

    return builder.genotypes(genotypes).attributes(attributes);
  }
  /**
   * Takes the interval, finds it in the stash, prints it to the VCF
   *
   * @param stats The statistics of the interval
   * @param refAllele the reference allele
   */
  private void outputStatsToVCF(final IntervalStratification stats, final Allele refAllele) {
    GenomeLoc interval = stats.getInterval();

    final List<Allele> alleles = new ArrayList<>();
    final Map<String, Object> attributes = new HashMap<>();
    final ArrayList<Genotype> genotypes = new ArrayList<>();

    for (String sample : samples) {
      final GenotypeBuilder gb = new GenotypeBuilder(sample);

      SampleStratification sampleStat = stats.getSampleStatistics(sample);
      gb.attribute(
          GATKVCFConstants.AVG_INTERVAL_DP_BY_SAMPLE_KEY,
          sampleStat.averageCoverage(interval.size()));
      gb.attribute(GATKVCFConstants.LOW_COVERAGE_LOCI, sampleStat.getNLowCoveredLoci());
      gb.attribute(GATKVCFConstants.ZERO_COVERAGE_LOCI, sampleStat.getNUncoveredLoci());
      gb.filters(statusToStrings(stats.getSampleStatistics(sample).callableStatuses(), false));

      genotypes.add(gb.make());
    }
    alleles.add(refAllele);
    alleles.add(SYMBOLIC_ALLELE);
    VariantContextBuilder vcb =
        new VariantContextBuilder(
            "DiagnoseTargets",
            interval.getContig(),
            interval.getStart(),
            interval.getStop(),
            alleles);

    vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR);
    vcb.filters(new LinkedHashSet<>(statusToStrings(stats.callableStatuses(), true)));

    attributes.put(VCFConstants.END_KEY, interval.getStop());
    attributes.put(GATKVCFConstants.AVG_INTERVAL_DP_KEY, stats.averageCoverage(interval.size()));
    attributes.put(GATKVCFConstants.INTERVAL_GC_CONTENT_KEY, stats.gcContent());

    vcb = vcb.attributes(attributes);
    vcb = vcb.genotypes(genotypes);

    vcfWriter.add(vcb.make());
  }
  /**
   * Main entry function to calculate genotypes of a given VC with corresponding GL's
   *
   * @param tracker Tracker
   * @param refContext Reference context
   * @param rawContext Raw context
   * @param stratifiedContexts Stratified alignment contexts
   * @param vc Input VC
   * @param model GL calculation model
   * @param inheritAttributesFromInputVC Output VC will contain attributes inherited from input vc
   * @return VC with assigned genotypes
   */
  public VariantCallContext calculateGenotypes(
      final RefMetaDataTracker tracker,
      final ReferenceContext refContext,
      final AlignmentContext rawContext,
      Map<String, AlignmentContext> stratifiedContexts,
      final VariantContext vc,
      final GenotypeLikelihoodsCalculationModel.Model model,
      final boolean inheritAttributesFromInputVC,
      final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap>
          perReadAlleleLikelihoodMap) {

    boolean limitedContext =
        tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;

    // initialize the data for this thread if that hasn't been done yet
    if (afcm.get() == null) {
      afcm.set(AFCalcFactory.createAFCalc(UAC, N, logger));
    }

    // estimate our confidence in a reference call and return
    if (vc.getNSamples() == 0) {
      if (limitedContext) return null;
      return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES
          ? estimateReferenceConfidence(vc, stratifiedContexts, getTheta(model), false, 1.0)
          : generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
    }

    AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));

    // is the most likely frequency conformation AC=0 for all alternate alleles?
    boolean bestGuessIsRef = true;

    // determine which alternate alleles have AF>0
    final List<Allele> myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
    final List<Integer> alleleCountsofMLE = new ArrayList<Integer>(vc.getAlleles().size());
    myAlleles.add(vc.getReference());
    for (int i = 0; i < AFresult.getAllelesUsedInGenotyping().size(); i++) {
      final Allele alternateAllele = AFresult.getAllelesUsedInGenotyping().get(i);
      if (alternateAllele.isReference()) continue;

      // we are non-ref if the probability of being non-ref > the emit confidence.
      // the emit confidence is phred-scaled, say 30 => 10^-3.
      // the posterior AF > 0 is log10: -5 => 10^-5
      // we are non-ref if 10^-5 < 10^-3 => -5 < -3
      final boolean isNonRef =
          AFresult.isPolymorphic(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING / -10.0);

      // if the most likely AC is not 0, then this is a good alternate allele to use
      if (isNonRef) {
        myAlleles.add(alternateAllele);
        alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
        bestGuessIsRef = false;
      }
      // if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
      else if (UAC.GenotypingMode
          == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
        myAlleles.add(alternateAllele);
        alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
      }
    }

    final double PoFGT0 = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0());

    // note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
    final double phredScaledConfidence =
        Math.abs(
            !bestGuessIsRef
                    || UAC.GenotypingMode
                        == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE
                            .GENOTYPE_GIVEN_ALLELES
                ? -10 * AFresult.getLog10PosteriorOfAFEq0()
                : -10 * AFresult.getLog10PosteriorOfAFGT0());

    // return a null call if we don't pass the confidence cutoff or the most likely allele frequency
    // is zero
    if (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES
        && !passesEmitThreshold(phredScaledConfidence, bestGuessIsRef)) {
      // technically, at this point our confidence in a reference call isn't accurately estimated
      //  because it didn't take into account samples with no data, so let's get a better estimate
      return limitedContext
          ? null
          : estimateReferenceConfidence(vc, stratifiedContexts, getTheta(model), true, PoFGT0);
    }

    // start constructing the resulting VC
    final GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
    final VariantContextBuilder builder =
        new VariantContextBuilder(
            "UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
    builder.log10PError(phredScaledConfidence / -10.0);
    if (!passesCallThreshold(phredScaledConfidence)) builder.filters(filter);

    // create the genotypes
    final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true, ploidy);
    builder.genotypes(genotypes);

    // print out stats if we have a writer
    if (verboseWriter != null && !limitedContext)
      printVerboseData(refContext.getLocus().toString(), vc, PoFGT0, phredScaledConfidence, model);

    // *** note that calculating strand bias involves overwriting data structures, so we do that
    // last
    final HashMap<String, Object> attributes = new HashMap<String, Object>();

    // inherit attributed from input vc if requested
    if (inheritAttributesFromInputVC) attributes.putAll(vc.getAttributes());
    // if the site was downsampled, record that fact
    if (!limitedContext && rawContext.hasPileupBeenDownsampled())
      attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);

    if (UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED)
      attributes.put(NUMBER_OF_DISCOVERED_ALLELES_KEY, vc.getAlternateAlleles().size());

    // add the MLE AC and AF annotations
    if (alleleCountsofMLE.size() > 0) {
      attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE);
      final int AN = builder.make().getCalledChrCount();
      final ArrayList<Double> MLEfrequencies = new ArrayList<Double>(alleleCountsofMLE.size());
      // the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT
      // is ./. but the exact model may arbitrarily choose an AC>1)
      for (int AC : alleleCountsofMLE) MLEfrequencies.add(Math.min(1.0, (double) AC / (double) AN));
      attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
    }

    if (UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef) {
      // final boolean DEBUG_SLOD = false;

      // the overall lod
      // double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
      double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
      // if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);

      List<Allele> allAllelesToUse = builder.make().getAlleles();

      // the forward lod
      VariantContext vcForward =
          calculateLikelihoods(
              tracker,
              refContext,
              stratifiedContexts,
              AlignmentContextUtils.ReadOrientation.FORWARD,
              allAllelesToUse,
              false,
              model,
              perReadAlleleLikelihoodMap);
      AFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model));
      // double[] normalizedLog10Posteriors =
      // MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
      double forwardLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0();
      double forwardLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
      // if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ",
      // forwardLog10PofF=" + forwardLog10PofF);

      // the reverse lod
      VariantContext vcReverse =
          calculateLikelihoods(
              tracker,
              refContext,
              stratifiedContexts,
              AlignmentContextUtils.ReadOrientation.REVERSE,
              allAllelesToUse,
              false,
              model,
              perReadAlleleLikelihoodMap);
      AFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model));
      // normalizedLog10Posteriors =
      // MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
      double reverseLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0();
      double reverseLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
      // if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ",
      // reverseLog10PofF=" + reverseLog10PofF);

      double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
      double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
      // if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" +
      // reverseLod);

      // strand score is max bias between forward and reverse strands
      double strandScore = Math.max(forwardLod, reverseLod);
      // rescale by a factor of 10
      strandScore *= 10.0;
      // logger.debug(String.format("SLOD=%f", strandScore));

      if (!Double.isNaN(strandScore)) attributes.put("SB", strandScore);
    }

    // finish constructing the resulting VC
    builder.attributes(attributes);
    VariantContext vcCall = builder.make();

    // if we are subsetting alleles (either because there were too many or because some were not
    // polymorphic)
    // then we may need to trim the alleles (because the original VariantContext may have had to pad
    // at the end).
    if (myAlleles.size() != vc.getAlleles().size()
        && !limitedContext) // limitedContext callers need to handle allele trimming on their own to
                            // keep their perReadAlleleLikelihoodMap alleles in sync
    vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);

    if (annotationEngine != null
        && !limitedContext) { // limitedContext callers need to handle annotations on their own by
                              // calling their own annotationEngine
      // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
      final ReadBackedPileup pileup = rawContext.getBasePileup();
      stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);

      vcCall =
          annotationEngine.annotateContext(
              tracker, refContext, stratifiedContexts, vcCall, perReadAlleleLikelihoodMap);
    }

    return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PoFGT0));
  }
  private static void makeSyntheticTests() {
    VariantContextBuilder rootBuilder = new VariantContextBuilder();
    rootBuilder.source("test");
    rootBuilder.loc("1", 10, 10);
    rootBuilder.alleles("A", "C");
    rootBuilder.unfiltered();
    ROOT = rootBuilder.make();

    add(builder());
    add(builder().alleles("A"));
    add(builder().alleles("A", "C", "T"));
    add(builder().alleles("A", "AC"));
    add(builder().alleles("A", "ACAGT"));
    add(builder().loc("1", 10, 11).alleles("AC", "A"));
    add(builder().loc("1", 10, 13).alleles("ACGT", "A"));

    // make sure filters work
    add(builder().unfiltered());
    add(builder().passFilters());
    add(builder().filters("FILTER1"));
    add(builder().filters("FILTER1", "FILTER2"));

    add(builder().log10PError(VariantContext.NO_LOG10_PERROR));
    add(builder().log10PError(-1));
    add(builder().log10PError(-1.234e6));

    add(builder().noID());
    add(builder().id("rsID12345"));

    add(builder().attribute("INT1", 1));
    add(builder().attribute("INT1", 100));
    add(builder().attribute("INT1", 1000));
    add(builder().attribute("INT1", 100000));
    add(builder().attribute("INT1", null));
    add(builder().attribute("INT3", Arrays.asList(1, 2, 3)));
    add(builder().attribute("INT3", Arrays.asList(1000, 2000, 3000)));
    add(builder().attribute("INT3", Arrays.asList(100000, 200000, 300000)));
    add(builder().attribute("INT3", null));
    add(builder().attribute("INT20", TWENTY_INTS));

    add(builder().attribute("FLOAT1", 1.0));
    add(builder().attribute("FLOAT1", 100.0));
    add(builder().attribute("FLOAT1", 1000.0));
    add(builder().attribute("FLOAT1", 100000.0));
    add(builder().attribute("FLOAT1", null));
    add(builder().attribute("FLOAT3", Arrays.asList(1.0, 2.0, 3.0)));
    add(builder().attribute("FLOAT3", Arrays.asList(1000.0, 2000.0, 3000.0)));
    add(builder().attribute("FLOAT3", Arrays.asList(100000.0, 200000.0, 300000.0)));
    add(builder().attribute("FLOAT3", null));

    add(builder().attribute("FLAG", true));
    // add(builder().attribute("FLAG", false)); // NOTE -- VCF doesn't allow false flags

    add(builder().attribute("STRING1", "s1"));
    add(builder().attribute("STRING1", null));
    add(builder().attribute("STRING3", Arrays.asList("s1", "s2", "s3")));
    add(builder().attribute("STRING3", null));
    add(
        builder()
            .attribute(
                "STRING20",
                Arrays.asList(
                    "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12",
                    "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20")));

    add(builder().attribute("VAR.INFO.STRING", "s1"));
    add(builder().attribute("VAR.INFO.STRING", Arrays.asList("s1", "s2")));
    add(builder().attribute("VAR.INFO.STRING", Arrays.asList("s1", "s2", "s3")));
    add(builder().attribute("VAR.INFO.STRING", null));

    if (ENABLE_GENOTYPE_TESTS) {
      addGenotypesToTestData();
      addComplexGenotypesTest();
    }

    if (ENABLE_A_AND_G_TESTS) addGenotypesAndGTests();

    if (ENABLE_SYMBOLIC_ALLELE_TESTS) addSymbolicAlleleTests();
  }
 public VariantContextTestData(final VCFHeader header, final VariantContextBuilder builder) {
   this(header, Collections.singletonList(builder.fullyDecoded(true).make()));
 }
Example #13
0
  /**
   * Read in a list of ExactCall objects from reader, keeping only those with starts in startsToKeep
   * or all sites (if this is empty)
   *
   * @param reader a just-opened reader sitting at the start of the file
   * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should
   *     be kept
   * @param parser a genome loc parser to create genome locs
   * @return a list of ExactCall objects in reader
   * @throws IOException
   */
  public static List<ExactCall> readExactLog(
      final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser)
      throws IOException {
    if (reader == null) throw new IllegalArgumentException("reader cannot be null");
    if (startsToKeep == null) throw new IllegalArgumentException("startsToKeep cannot be null");
    if (parser == null) throw new IllegalArgumentException("GenomeLocParser cannot be null");

    List<ExactCall> calls = new LinkedList<ExactCall>();

    // skip the header line
    reader.readLine();

    // skip the first "type" line
    reader.readLine();

    while (true) {
      final VariantContextBuilder builder = new VariantContextBuilder();
      final List<Allele> alleles = new ArrayList<Allele>();
      final List<Genotype> genotypes = new ArrayList<Genotype>();
      final double[] posteriors = new double[2];
      final double[] priors = MathUtils.normalizeFromLog10(new double[] {0.5, 0.5}, true);
      final List<Integer> mle = new ArrayList<Integer>();
      final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
      long runtimeNano = -1;

      GenomeLoc currentLoc = null;
      while (true) {
        final String line = reader.readLine();
        if (line == null) return calls;

        final String[] parts = line.split("\t");
        final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]);
        final String variable = parts[1];
        final String key = parts[2];
        final String value = parts[3];

        if (currentLoc == null) currentLoc = lineLoc;

        if (variable.equals("type")) {
          if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) {
            builder.alleles(alleles);
            final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
            builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
            builder.genotypes(genotypes);
            final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[] {}));
            final AFCalcResult result =
                new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
            calls.add(new ExactCall(builder.make(), runtimeNano, result));
          }
          break;
        } else if (variable.equals("allele")) {
          final boolean isRef = key.equals("0");
          alleles.add(Allele.create(value, isRef));
        } else if (variable.equals("PL")) {
          final GenotypeBuilder gb = new GenotypeBuilder(key);
          gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
          genotypes.add(gb.make());
        } else if (variable.equals("log10PosteriorOfAFEq0")) {
          posteriors[0] = Double.valueOf(value);
        } else if (variable.equals("log10PosteriorOfAFGt0")) {
          posteriors[1] = Double.valueOf(value);
        } else if (variable.equals("MLE")) {
          mle.add(Integer.valueOf(value));
        } else if (variable.equals("pNonRefByAllele")) {
          final Allele a = Allele.create(key);
          log10pNonRefByAllele.put(a, Double.valueOf(value));
        } else if (variable.equals("runtime.nano")) {
          runtimeNano = Long.valueOf(value);
        } else {
          // nothing to do
        }
      }
    }
  }
Example #14
0
  static VariantContext reallyMergeIntoMNP(
      VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
    int startInter = vc1.getEnd() + 1;
    int endInter = vc2.getStart() - 1;
    byte[] intermediateBases = null;
    if (startInter <= endInter) {
      intermediateBases =
          referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases();
      StringUtil.toUpperCase(intermediateBases);
    }
    MergedAllelesData mergeData =
        new MergedAllelesData(
            intermediateBases, vc1, vc2); // ensures that the reference allele is added

    GenotypesContext mergedGenotypes = GenotypesContext.create();
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      List<Allele> site1Alleles = gt1.getAlleles();
      List<Allele> site2Alleles = gt2.getAlleles();

      List<Allele> mergedAllelesForSample = new LinkedList<Allele>();

      /* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records,
        we preserve phase information (if any) relative to whatever precedes vc1:
      */
      Iterator<Allele> all2It = site2Alleles.iterator();
      for (Allele all1 : site1Alleles) {
        Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()

        Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2);
        mergedAllelesForSample.add(mergedAllele);
      }

      double mergedGQ = Math.max(gt1.getLog10PError(), gt2.getLog10PError());
      Set<String> mergedGtFilters =
          new HashSet<
              String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered

      Map<String, Object> mergedGtAttribs = new HashMap<String, Object>();
      PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2);
      if (phaseQual.PQ != null) mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);

      Genotype mergedGt =
          new Genotype(
              gt1.getSampleName(),
              mergedAllelesForSample,
              mergedGQ,
              mergedGtFilters,
              mergedGtAttribs,
              phaseQual.isPhased);
      mergedGenotypes.add(mergedGt);
    }

    String mergedName = mergeVariantContextNames(vc1.getSource(), vc2.getSource());
    double mergedLog10PError = Math.min(vc1.getLog10PError(), vc2.getLog10PError());
    Set<String> mergedFilters =
        new HashSet<
            String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered
    Map<String, Object> mergedAttribs = mergeVariantContextAttributes(vc1, vc2);

    // ids
    List<String> mergedIDs = new ArrayList<String>();
    if (vc1.hasID()) mergedIDs.add(vc1.getID());
    if (vc2.hasID()) mergedIDs.add(vc2.getID());
    String mergedID =
        mergedIDs.isEmpty()
            ? VCFConstants.EMPTY_ID_FIELD
            : Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs);

    VariantContextBuilder mergedBuilder =
        new VariantContextBuilder(
                mergedName,
                vc1.getChr(),
                vc1.getStart(),
                vc2.getEnd(),
                mergeData.getAllMergedAlleles())
            .id(mergedID)
            .genotypes(mergedGenotypes)
            .log10PError(mergedLog10PError)
            .filters(mergedFilters)
            .attributes(mergedAttribs);
    VariantContextUtils.calculateChromosomeCounts(mergedBuilder, true);
    return mergedBuilder.make();
  }
Example #15
0
 public VariantContext subContextFromSamples(Set<String> sampleNames) {
   VariantContextBuilder builder = new VariantContextBuilder(this);
   GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames);
   return builder.genotypes(newGenotypes).alleles(allelesOfGenotypes(newGenotypes)).make();
 }
Example #16
0
 public VariantContext subContextFromSamples(Set<String> sampleNames, Collection<Allele> alleles) {
   VariantContextBuilder builder = new VariantContextBuilder(this);
   return builder.genotypes(genotypes.subsetToSamples(sampleNames)).alleles(alleles).make();
 }
  public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
    // see if we need to trim common reference base from all alleles
    boolean trimVC;

    // We need to trim common reference base from all alleles in all genotypes if a ref base is
    // common to all alleles
    Allele refAllele = inputVC.getReference();
    if (!inputVC.isVariant()) trimVC = false;
    else if (refAllele.isNull()) trimVC = false;
    else {
      trimVC =
          (AbstractVCFCodec.computeForwardClipping(
                  new ArrayList<Allele>(inputVC.getAlternateAlleles()),
                  inputVC.getReference().getDisplayString())
              > 0);
    }

    // nothing to do if we don't need to trim bases
    if (trimVC) {
      List<Allele> alleles = new ArrayList<Allele>();
      GenotypesContext genotypes = GenotypesContext.create();

      // set the reference base for indels in the attributes
      Map<String, Object> attributes = new TreeMap<String, Object>(inputVC.getAttributes());

      Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();

      for (Allele a : inputVC.getAlleles()) {
        if (a.isSymbolic()) {
          alleles.add(a);
          originalToTrimmedAlleleMap.put(a, a);
        } else {
          // get bases for current allele and create a new one with trimmed bases
          byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
          Allele trimmedAllele = Allele.create(newBases, a.isReference());
          alleles.add(trimmedAllele);
          originalToTrimmedAlleleMap.put(a, trimmedAllele);
        }
      }

      // detect case where we're trimming bases but resulting vc doesn't have any null allele. In
      // that case, we keep original representation
      // example: mixed records such as {TA*,TGA,TG}
      boolean hasNullAlleles = false;

      for (Allele a : originalToTrimmedAlleleMap.values()) {
        if (a.isNull()) hasNullAlleles = true;
        if (a.isReference()) refAllele = a;
      }

      if (!hasNullAlleles) return inputVC;
      // now we can recreate new genotypes with trimmed alleles
      for (final Genotype genotype : inputVC.getGenotypes()) {

        List<Allele> originalAlleles = genotype.getAlleles();
        List<Allele> trimmedAlleles = new ArrayList<Allele>();
        for (Allele a : originalAlleles) {
          if (a.isCalled()) trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
          else trimmedAlleles.add(Allele.NO_CALL);
        }
        genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
      }

      final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
      return builder
          .alleles(alleles)
          .genotypes(genotypes)
          .attributes(attributes)
          .referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0]))
          .make();
    }

    return inputVC;
  }
  /**
   * Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority
   * order, if provided. If uniqifySamples is true, the priority order is ignored and names are
   * created by concatenating the VC name with the sample name
   *
   * @param genomeLocParser loc parser
   * @param unsortedVCs collection of unsorted VCs
   * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
   * @param filteredRecordMergeType merge type for filtered records
   * @param genotypeMergeOptions merge option for genotypes
   * @param annotateOrigin should we annotate the set it came from?
   * @param printMessages should we print messages?
   * @param setKey the key name of the set
   * @param filteredAreUncalled are filtered records uncalled?
   * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
   * @return new VariantContext representing the merge of unsortedVCs
   */
  public static VariantContext simpleMerge(
      final GenomeLocParser genomeLocParser,
      final Collection<VariantContext> unsortedVCs,
      final List<String> priorityListOfVCs,
      final FilteredRecordMergeType filteredRecordMergeType,
      final GenotypeMergeType genotypeMergeOptions,
      final boolean annotateOrigin,
      final boolean printMessages,
      final String setKey,
      final boolean filteredAreUncalled,
      final boolean mergeInfoWithMaxAC) {
    if (unsortedVCs == null || unsortedVCs.size() == 0) return null;

    if (annotateOrigin && priorityListOfVCs == null)
      throw new IllegalArgumentException(
          "Cannot merge calls and annotate their origins without a complete priority list of VariantContexts");

    if (genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE)
      verifyUniqueSampleNames(unsortedVCs);

    List<VariantContext> prepaddedVCs =
        sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
    // Make sure all variant contexts are padded with reference base in case of indels if necessary
    List<VariantContext> VCs = new ArrayList<VariantContext>();

    for (VariantContext vc : prepaddedVCs) {
      // also a reasonable place to remove filtered calls, if needed
      if (!filteredAreUncalled || vc.isNotFiltered())
        VCs.add(createVariantContextWithPaddedAlleles(vc, false));
    }
    if (VCs.size() == 0) // everything is filtered out and we're filteredAreUncalled
    return null;

    // establish the baseline info from the first VC
    final VariantContext first = VCs.get(0);
    final String name = first.getSource();
    final Allele refAllele = determineReferenceAllele(VCs);

    final Set<Allele> alleles = new LinkedHashSet<Allele>();
    final Set<String> filters = new TreeSet<String>();
    final Map<String, Object> attributes = new TreeMap<String, Object>();
    final Set<String> inconsistentAttributes = new HashSet<String>();
    final Set<String> variantSources =
        new HashSet<
            String>(); // contains the set of sources we found in our set of VCs that are variant
    final Set<String> rsIDs = new LinkedHashSet<String>(1); // most of the time there's one id

    GenomeLoc loc = getLocation(genomeLocParser, first);
    int depth = 0;
    int maxAC = -1;
    final Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
    double log10PError = 1;
    VariantContext vcWithMaxAC = null;
    GenotypesContext genotypes = GenotypesContext.create();

    // counting the number of filtered and variant VCs
    int nFiltered = 0;

    boolean remapped = false;

    // cycle through and add info from the other VCs, making sure the loc/reference matches

    for (VariantContext vc : VCs) {
      if (loc.getStart() != vc.getStart()) // || !first.getReference().equals(vc.getReference()) )
      throw new ReviewedStingException(
            "BUG: attempting to merge VariantContexts with different start sites: first="
                + first.toString()
                + " second="
                + vc.toString());

      if (getLocation(genomeLocParser, vc).size() > loc.size())
        loc = getLocation(genomeLocParser, vc); // get the longest location

      nFiltered += vc.isFiltered() ? 1 : 0;
      if (vc.isVariant()) variantSources.add(vc.getSource());

      AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles);
      remapped = remapped || alleleMapping.needsRemapping();

      alleles.addAll(alleleMapping.values());

      mergeGenotypes(
          genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY);

      log10PError = Math.min(log10PError, vc.isVariant() ? vc.getLog10PError() : 1);

      filters.addAll(vc.getFilters());

      //
      // add attributes
      //
      // special case DP (add it up) and ID (just preserve it)
      //
      if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
        depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);
      if (vc.hasID()) rsIDs.add(vc.getID());
      if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
        String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null);
        // lets see if the string contains a , separator
        if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) {
          List<String> alleleCountArray =
              Arrays.asList(
                  rawAlleleCounts
                      .substring(1, rawAlleleCounts.length() - 1)
                      .split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
          for (String alleleCount : alleleCountArray) {
            final int ac = Integer.valueOf(alleleCount.trim());
            if (ac > maxAC) {
              maxAC = ac;
              vcWithMaxAC = vc;
            }
          }
        } else {
          final int ac = Integer.valueOf(rawAlleleCounts);
          if (ac > maxAC) {
            maxAC = ac;
            vcWithMaxAC = vc;
          }
        }
      }

      for (Map.Entry<String, Object> p : vc.getAttributes().entrySet()) {
        String key = p.getKey();
        // if we don't like the key already, don't go anywhere
        if (!inconsistentAttributes.contains(key)) {
          boolean alreadyFound = attributes.containsKey(key);
          Object boundValue = attributes.get(key);
          boolean boundIsMissingValue =
              alreadyFound && boundValue.equals(VCFConstants.MISSING_VALUE_v4);

          if (alreadyFound && !boundValue.equals(p.getValue()) && !boundIsMissingValue) {
            // we found the value but we're inconsistent, put it in the exclude list
            // System.out.printf("Inconsistent INFO values: %s => %s and %s%n", key, boundValue,
            // p.getValue());
            inconsistentAttributes.add(key);
            attributes.remove(key);
          } else if (!alreadyFound || boundIsMissingValue) { // no value
            // if ( vc != first ) System.out.printf("Adding key %s => %s%n", p.getKey(),
            // p.getValue());
            attributes.put(key, p.getValue());
          }
        }
      }
    }

    // if we have more alternate alleles in the merged VC than in one or more of the
    // original VCs, we need to strip out the GL/PLs (because they are no longer accurate), as well
    // as allele-dependent attributes like AC,AF
    for (VariantContext vc : VCs) {
      if (vc.alleles.size() == 1) continue;
      if (hasPLIncompatibleAlleles(alleles, vc.alleles)) {
        if (!genotypes.isEmpty())
          logger.warn(
              String.format(
                  "Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s",
                  genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles));
        genotypes = stripPLs(genotypes);
        // this will remove stale AC,AF attributed from vc
        calculateChromosomeCounts(vc, attributes, true);
        break;
      }
    }

    // take the VC with the maxAC and pull the attributes into a modifiable map
    if (mergeInfoWithMaxAC && vcWithMaxAC != null) {
      attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes());
    }

    // if at least one record was unfiltered and we want a union, clear all of the filters
    if ((filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED
            && nFiltered != VCs.size())
        || filteredRecordMergeType == FilteredRecordMergeType.KEEP_UNCONDITIONAL) filters.clear();

    if (annotateOrigin) { // we care about where the call came from
      String setValue;
      if (nFiltered == 0
          && variantSources.size() == priorityListOfVCs.size()) // nothing was unfiltered
      setValue = MERGE_INTERSECTION;
      else if (nFiltered == VCs.size()) // everything was filtered out
      setValue = MERGE_FILTER_IN_ALL;
      else if (variantSources.isEmpty()) // everyone was reference
      setValue = MERGE_REF_IN_ALL;
      else {
        LinkedHashSet<String> s = new LinkedHashSet<String>();
        for (VariantContext vc : VCs)
          if (vc.isVariant())
            s.add(vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource());
        setValue = Utils.join("-", s);
      }

      if (setKey != null) {
        attributes.put(setKey, setValue);
        if (mergeInfoWithMaxAC && vcWithMaxAC != null) {
          attributesWithMaxAC.put(setKey, vcWithMaxAC.getSource());
        }
      }
    }

    if (depth > 0) attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));

    final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(",", rsIDs);

    final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID);
    builder.loc(loc.getContig(), loc.getStart(), loc.getStop());
    builder.alleles(alleles);
    builder.genotypes(genotypes);
    builder.log10PError(log10PError);
    builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);

    // Trim the padded bases of all alleles if necessary
    VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make());
    if (printMessages && remapped) System.out.printf("Remapped => %s%n", merged);
    return merged;
  }