Esempio n. 1
0
  @Test
  public void testFixReverseComplementedGenotypes() {

    final Allele refA = Allele.create("A", true);
    final Allele altC = Allele.create("C", false);
    final GenotypesContext originalGenotypes = GenotypesContext.create(3);
    originalGenotypes.add(new GenotypeBuilder("homref").alleles(Arrays.asList(refA, refA)).make());
    originalGenotypes.add(new GenotypeBuilder("het").alleles(Arrays.asList(refA, altC)).make());
    originalGenotypes.add(new GenotypeBuilder("homvar").alleles(Arrays.asList(altC, altC)).make());

    final Allele refT = Allele.create("T", true);
    final Allele altG = Allele.create("G", false);
    final GenotypesContext expectedGenotypes = GenotypesContext.create(3);
    expectedGenotypes.add(new GenotypeBuilder("homref").alleles(Arrays.asList(refT, refT)).make());
    expectedGenotypes.add(new GenotypeBuilder("het").alleles(Arrays.asList(refT, altG)).make());
    expectedGenotypes.add(new GenotypeBuilder("homvar").alleles(Arrays.asList(altG, altG)).make());

    final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<Allele, Allele>(2);
    reverseComplementAlleleMap.put(refA, refT);
    reverseComplementAlleleMap.put(altC, altG);
    final GenotypesContext actualGenotypes =
        LiftoverVcf.fixGenotypes(originalGenotypes, reverseComplementAlleleMap);

    for (final String sample : Arrays.asList("homref", "het", "homvar")) {
      final List<Allele> expected = expectedGenotypes.get(sample).getAlleles();
      final List<Allele> actual = actualGenotypes.get(sample).getAlleles();
      Assert.assertEquals(expected.get(0), actual.get(0));
      Assert.assertEquals(expected.get(1), actual.get(1));
    }
  }
Esempio n. 2
0
  private Collection<VariantContext> getVariantContexts(
      RefMetaDataTracker tracker, ReferenceContext ref) {

    List<Feature> features = tracker.getValues(variants, ref.getLocus());
    List<VariantContext> VCs = new ArrayList<VariantContext>(features.size());

    for (Feature record : features) {
      if (VariantContextAdaptors.canBeConvertedToVariantContext(record)) {
        // we need to special case the HapMap format because indels aren't handled correctly
        if (record instanceof RawHapMapFeature) {

          // is it an indel?
          RawHapMapFeature hapmap = (RawHapMapFeature) record;
          if (hapmap.getAlleles()[0].equals(RawHapMapFeature.NULL_ALLELE_STRING)
              || hapmap.getAlleles()[1].equals(RawHapMapFeature.NULL_ALLELE_STRING)) {
            // get the dbsnp object corresponding to this record (needed to help us distinguish
            // between insertions and deletions)
            VariantContext dbsnpVC = getDbsnp(hapmap.getName());
            if (dbsnpVC == null || dbsnpVC.isMixed()) continue;

            Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
            alleleMap.put(
                RawHapMapFeature.DELETION,
                Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion()));
            alleleMap.put(
                RawHapMapFeature.INSERTION,
                Allele.create(
                    (char) ref.getBase() + ((RawHapMapFeature) record).getAlleles()[1],
                    !dbsnpVC.isSimpleInsertion()));
            hapmap.setActualAlleles(alleleMap);

            // also, use the correct positioning for insertions
            hapmap.updatePosition(dbsnpVC.getStart());

            if (hapmap.getStart() < ref.getWindow().getStart()) {
              logger.warn(
                  "Hapmap record at "
                      + ref.getLocus()
                      + " represents an indel too large to be converted; skipping...");
              continue;
            }
          }
        }

        // ok, we might actually be able to turn this record in a variant context
        VariantContext vc =
            VariantContextAdaptors.toVariantContext(variants.getName(), record, ref);

        if (vc != null) // sometimes the track has odd stuff in it that can't be converted
        VCs.add(vc);
      }
    }

    return VCs;
  }
 private ReverseClippingPositionTestProvider(
     final int expectedClip, final String ref, final String... alleles) {
   super(ReverseClippingPositionTestProvider.class);
   this.ref = ref;
   for (final String allele : alleles) this.alleles.add(Allele.create(allele));
   this.expectedClip = expectedClip;
 }
  /**
   * Returns a context identical to this with the REF and ALT alleles reverse complemented.
   *
   * @param vc variant context
   * @return new vc
   */
  public static VariantContext reverseComplement(VariantContext vc) {
    // create a mapping from original allele to reverse complemented allele
    HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
    for (Allele originalAllele : vc.getAlleles()) {
      Allele newAllele;
      if (originalAllele.isNoCall() || originalAllele.isNull()) newAllele = originalAllele;
      else
        newAllele =
            Allele.create(
                BaseUtils.simpleReverseComplement(originalAllele.getBases()),
                originalAllele.isReference());
      alleleMap.put(originalAllele, newAllele);
    }

    // create new Genotype objects
    GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
    for (final Genotype genotype : vc.getGenotypes()) {
      List<Allele> newAlleles = new ArrayList<Allele>();
      for (Allele allele : genotype.getAlleles()) {
        Allele newAllele = alleleMap.get(allele);
        if (newAllele == null) newAllele = Allele.NO_CALL;
        newAlleles.add(newAllele);
      }
      newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
    }

    return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
  }
    private RepeatDetectorTest(
        boolean isTrueRepeat, String ref, String refAlleleString, String... altAlleleStrings) {
      super(RepeatDetectorTest.class);
      this.ref = "N" + ref; // add a dummy base for the event here
      this.isTrueRepeat = isTrueRepeat;

      List<Allele> alleles = new LinkedList<Allele>();
      final Allele refAllele = Allele.create(refAlleleString, true);
      alleles.add(refAllele);
      for (final String altString : altAlleleStrings) {
        final Allele alt = Allele.create(altString, false);
        alleles.add(alt);
      }

      VariantContextBuilder builder =
          new VariantContextBuilder("test", "chr1", 1, 1 + refAllele.length(), alleles);
      this.vc = builder.make();
    }
Esempio n. 6
0
 /**
  * Outputs all intervals that are behind the current reference locus
  *
  * @param refLocus the current reference locus
  * @param refBase the reference allele
  */
 private void outputFinishedIntervals(final GenomeLoc refLocus, final byte refBase) {
   // output any intervals that were finished
   final List<GenomeLoc> toRemove = new LinkedList<>();
   for (GenomeLoc key : intervalMap.keySet()) {
     if (key.isBefore(refLocus)) {
       final IntervalStratification intervalStats = intervalMap.get(key);
       outputStatsToVCF(intervalStats, Allele.create(refBase, true));
       if (hasMissingLoci(intervalStats)) {
         outputMissingInterval(intervalStats);
       }
       toRemove.add(key);
     }
   }
   for (GenomeLoc key : toRemove) {
     intervalMap.remove(key);
   }
 }
  private VariantCallContext generateEmptyContext(
      RefMetaDataTracker tracker,
      ReferenceContext ref,
      Map<String, AlignmentContext> stratifiedContexts,
      AlignmentContext rawContext) {
    VariantContext vc;
    if (UAC.GenotypingMode
        == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
      VariantContext vcInput =
          UnifiedGenotyperEngine.getVCFromAllelesRod(
              tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles);
      if (vcInput == null) return null;
      vc =
          new VariantContextBuilder(
                  "UG_call",
                  ref.getLocus().getContig(),
                  vcInput.getStart(),
                  vcInput.getEnd(),
                  vcInput.getAlleles())
              .make();
    } else {
      // deal with bad/non-standard reference bases
      if (!Allele.acceptableAlleleBases(new byte[] {ref.getBase()})) return null;

      Set<Allele> alleles = new HashSet<Allele>();
      alleles.add(Allele.create(ref.getBase(), true));
      vc =
          new VariantContextBuilder(
                  "UG_call",
                  ref.getLocus().getContig(),
                  ref.getLocus().getStart(),
                  ref.getLocus().getStart(),
                  alleles)
              .make();
    }

    if (annotationEngine != null) {
      // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
      final ReadBackedPileup pileup = rawContext.getBasePileup();
      stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);

      vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc);
    }

    return new VariantCallContext(vc, false);
  }
Esempio n. 8
0
    private Allele ensureMergedAllele(
        Allele all1, Allele all2, boolean creatingReferenceForFirstTime) {
      AlleleOneAndTwo all12 = new AlleleOneAndTwo(all1, all2);
      Allele mergedAllele = mergedAlleles.get(all12);

      if (mergedAllele == null) {
        byte[] bases1 = all1.getBases();
        byte[] bases2 = all2.getBases();

        byte[] mergedBases = new byte[bases1.length + intermediateLength + bases2.length];
        System.arraycopy(bases1, 0, mergedBases, 0, bases1.length);
        if (intermediateBases != null)
          System.arraycopy(intermediateBases, 0, mergedBases, bases1.length, intermediateLength);
        System.arraycopy(bases2, 0, mergedBases, bases1.length + intermediateLength, bases2.length);

        mergedAllele = Allele.create(mergedBases, creatingReferenceForFirstTime);
        mergedAlleles.put(all12, mergedAllele);
      }

      return mergedAllele;
    }
  @BeforeSuite
  public void setup() {
    final File referenceFile = new File(b37KGReference);
    try {
      IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
      genomeLocParser = new GenomeLocParser(seq);
    } catch (FileNotFoundException ex) {
      throw new UserException.CouldNotReadInputFile(referenceFile, ex);
    }

    // alleles
    Aref = Allele.create("A", true);
    Cref = Allele.create("C", true);
    T = Allele.create("T");
    C = Allele.create("C");
    ATC = Allele.create("ATC");
    ATCATC = Allele.create("ATCATC");
  }
Esempio n. 10
0
  /**
   * Read in a list of ExactCall objects from reader, keeping only those with starts in startsToKeep
   * or all sites (if this is empty)
   *
   * @param reader a just-opened reader sitting at the start of the file
   * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should
   *     be kept
   * @param parser a genome loc parser to create genome locs
   * @return a list of ExactCall objects in reader
   * @throws IOException
   */
  public static List<ExactCall> readExactLog(
      final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser)
      throws IOException {
    if (reader == null) throw new IllegalArgumentException("reader cannot be null");
    if (startsToKeep == null) throw new IllegalArgumentException("startsToKeep cannot be null");
    if (parser == null) throw new IllegalArgumentException("GenomeLocParser cannot be null");

    List<ExactCall> calls = new LinkedList<ExactCall>();

    // skip the header line
    reader.readLine();

    // skip the first "type" line
    reader.readLine();

    while (true) {
      final VariantContextBuilder builder = new VariantContextBuilder();
      final List<Allele> alleles = new ArrayList<Allele>();
      final List<Genotype> genotypes = new ArrayList<Genotype>();
      final double[] posteriors = new double[2];
      final double[] priors = MathUtils.normalizeFromLog10(new double[] {0.5, 0.5}, true);
      final List<Integer> mle = new ArrayList<Integer>();
      final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
      long runtimeNano = -1;

      GenomeLoc currentLoc = null;
      while (true) {
        final String line = reader.readLine();
        if (line == null) return calls;

        final String[] parts = line.split("\t");
        final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]);
        final String variable = parts[1];
        final String key = parts[2];
        final String value = parts[3];

        if (currentLoc == null) currentLoc = lineLoc;

        if (variable.equals("type")) {
          if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) {
            builder.alleles(alleles);
            final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
            builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
            builder.genotypes(genotypes);
            final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[] {}));
            final AFCalcResult result =
                new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
            calls.add(new ExactCall(builder.make(), runtimeNano, result));
          }
          break;
        } else if (variable.equals("allele")) {
          final boolean isRef = key.equals("0");
          alleles.add(Allele.create(value, isRef));
        } else if (variable.equals("PL")) {
          final GenotypeBuilder gb = new GenotypeBuilder(key);
          gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
          genotypes.add(gb.make());
        } else if (variable.equals("log10PosteriorOfAFEq0")) {
          posteriors[0] = Double.valueOf(value);
        } else if (variable.equals("log10PosteriorOfAFGt0")) {
          posteriors[1] = Double.valueOf(value);
        } else if (variable.equals("MLE")) {
          mle.add(Integer.valueOf(value));
        } else if (variable.equals("pNonRefByAllele")) {
          final Allele a = Allele.create(key);
          log10pNonRefByAllele.put(a, Double.valueOf(value));
        } else if (variable.equals("runtime.nano")) {
          runtimeNano = Long.valueOf(value);
        } else {
          // nothing to do
        }
      }
    }
  }
Esempio n. 11
0
/**
 * Analyze coverage distribution and validate read mates per interval and per sample
 *
 * <p>This tool is useful for diagnosing regions with bad coverage, mapping, or read mate pairs. It
 * analyzes each sample independently and aggregates results over intervals of interest.
 * Low-coverage regions can be identified by using e.g. FindCoveredIntervals with the -uncovered
 * argument.
 *
 * <h3>Input</h3>
 *
 * <ul>
 *   <li>A reference file
 *   <li>one or more input BAMs
 *   <li>One or more intervals
 * </ul>
 *
 * <h3>Output</h3>
 *
 * <p>A modified VCF detailing each interval by sample and information for each interval according
 * to the thresholds used. Interval information includes GC Content, average interval depth,
 * callable status among others. If you use the --missing option, you can get as a second output a
 * intervals file with the loci that have missing data. This file can then be used as input to
 * QualifyMissingIntervals for full qualification and interpretation of why the data is missing.
 *
 * <h3>Usage example</h3>
 *
 * <pre>
 *    java -jar GenomeAnalysisTK.jar
 *              -T DiagnoseTargets \
 *              -R reference.fasta \
 *              -I sample1.bam \
 *              -I sample2.bam \
 *              -I sample3.bam \
 *              -L intervals.interval_list \
 *              -o output.vcf
 *  </pre>
 *
 * @author Mauricio Carneiro, Roger Zurawicki
 * @since 5/8/12
 */
@DocumentedGATKFeature(
    groupName = HelpConstants.DOCS_CAT_QC,
    extraDocs = {CommandLineGATK.class})
@By(value = DataSource.READS)
@PartitionBy(PartitionType.INTERVAL)
@Downsample(by = DownsampleType.NONE)
public class DiagnoseTargets extends LocusWalker<Long, Long> {

  @Output(doc = "File to which interval statistics should be written")
  private VariantContextWriter vcfWriter = null;

  @ArgumentCollection private ThresHolder thresholds = new ThresHolder();

  private Map<GenomeLoc, IntervalStratification> intervalMap =
      null; // maps each interval => statistics
  private PeekableIterator<GenomeLoc>
      intervalListIterator; // an iterator to go over all the intervals provided as we traverse the
                            // genome
  private Set<String> samples = null; // all the samples being processed
  private static final Allele SYMBOLIC_ALLELE =
      Allele.create("<DT>", false); // avoid creating the symbolic allele multiple times
  private static final Allele UNCOVERED_ALLELE =
      Allele.create(
          "A", true); // avoid creating the 'fake' ref allele for uncovered intervals multiple times
  private static final int INITIAL_HASH_SIZE =
      50; // enough room for potential overlapping intervals plus recently finished intervals

  @Override
  public void initialize() {
    super.initialize();

    if (getToolkit().getIntervals() == null || getToolkit().getIntervals().isEmpty())
      throw new UserException(
          "This tool only works if you provide one or more intervals (use the -L argument). If you want to run whole genome, use -T DepthOfCoverage instead.");

    intervalMap = new LinkedHashMap<>(INITIAL_HASH_SIZE);
    intervalListIterator = new PeekableIterator<>(getToolkit().getIntervals().iterator());

    // get all of the unique sample names for the VCF Header
    samples = ReadUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
    vcfWriter.writeHeader(new VCFHeader(getHeaderInfo(), samples));

    // pre load all the statistics classes because it is costly to operate on the JVM and we only
    // want to do it once.
    loadAllPlugins(thresholds);
  }

  @Override
  public Long map(
      final RefMetaDataTracker tracker,
      final ReferenceContext ref,
      final AlignmentContext context) {
    GenomeLoc refLocus = ref.getLocus();

    // process and remove any intervals in the map that are don't overlap the current locus anymore
    // and add all new intervals that may overlap this reference locus
    addNewOverlappingIntervals(refLocus);
    outputFinishedIntervals(refLocus, ref.getBase());

    // at this point, all intervals in intervalMap overlap with this locus, so update all of them
    for (IntervalStratification intervalStratification : intervalMap.values())
      intervalStratification.addLocus(context, ref);

    return 1L;
  }

  @Override
  public Long reduceInit() {
    return 0L;
  }

  /**
   * Not sure what we are going to do here
   *
   * @param value result of the map.
   * @param sum accumulator for the reduce.
   * @return a long
   */
  @Override
  public Long reduce(Long value, Long sum) {
    return sum + value;
  }

  /**
   * Process all remaining intervals
   *
   * @param result number of loci processed by the walker
   */
  @Override
  public void onTraversalDone(final Long result) {
    for (GenomeLoc interval : intervalMap.keySet())
      outputStatsToVCF(intervalMap.get(interval), UNCOVERED_ALLELE);

    GenomeLoc interval = intervalListIterator.peek();
    while (interval != null) {
      outputStatsToVCF(createIntervalStatistic(interval), UNCOVERED_ALLELE);
      intervalListIterator.next();
      interval = intervalListIterator.peek();
    }

    if (thresholds.missingTargets != null) {
      thresholds.missingTargets.close();
    }
  }

  /**
   * Outputs all intervals that are behind the current reference locus
   *
   * @param refLocus the current reference locus
   * @param refBase the reference allele
   */
  private void outputFinishedIntervals(final GenomeLoc refLocus, final byte refBase) {
    // output any intervals that were finished
    final List<GenomeLoc> toRemove = new LinkedList<>();
    for (GenomeLoc key : intervalMap.keySet()) {
      if (key.isBefore(refLocus)) {
        final IntervalStratification intervalStats = intervalMap.get(key);
        outputStatsToVCF(intervalStats, Allele.create(refBase, true));
        if (hasMissingLoci(intervalStats)) {
          outputMissingInterval(intervalStats);
        }
        toRemove.add(key);
      }
    }
    for (GenomeLoc key : toRemove) {
      intervalMap.remove(key);
    }
  }

  /**
   * Adds all intervals that overlap the current reference locus to the intervalMap
   *
   * @param refLocus the current reference locus
   */
  private void addNewOverlappingIntervals(final GenomeLoc refLocus) {
    GenomeLoc interval = intervalListIterator.peek();
    while (interval != null && !interval.isPast(refLocus)) {
      intervalMap.put(interval, createIntervalStatistic(interval));
      intervalListIterator.next();
      interval = intervalListIterator.peek();
    }
  }

  /**
   * Takes the interval, finds it in the stash, prints it to the VCF
   *
   * @param stats The statistics of the interval
   * @param refAllele the reference allele
   */
  private void outputStatsToVCF(final IntervalStratification stats, final Allele refAllele) {
    GenomeLoc interval = stats.getInterval();

    final List<Allele> alleles = new ArrayList<>();
    final Map<String, Object> attributes = new HashMap<>();
    final ArrayList<Genotype> genotypes = new ArrayList<>();

    for (String sample : samples) {
      final GenotypeBuilder gb = new GenotypeBuilder(sample);

      SampleStratification sampleStat = stats.getSampleStatistics(sample);
      gb.attribute(
          GATKVCFConstants.AVG_INTERVAL_DP_BY_SAMPLE_KEY,
          sampleStat.averageCoverage(interval.size()));
      gb.attribute(GATKVCFConstants.LOW_COVERAGE_LOCI, sampleStat.getNLowCoveredLoci());
      gb.attribute(GATKVCFConstants.ZERO_COVERAGE_LOCI, sampleStat.getNUncoveredLoci());
      gb.filters(statusToStrings(stats.getSampleStatistics(sample).callableStatuses(), false));

      genotypes.add(gb.make());
    }
    alleles.add(refAllele);
    alleles.add(SYMBOLIC_ALLELE);
    VariantContextBuilder vcb =
        new VariantContextBuilder(
            "DiagnoseTargets",
            interval.getContig(),
            interval.getStart(),
            interval.getStop(),
            alleles);

    vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR);
    vcb.filters(new LinkedHashSet<>(statusToStrings(stats.callableStatuses(), true)));

    attributes.put(VCFConstants.END_KEY, interval.getStop());
    attributes.put(GATKVCFConstants.AVG_INTERVAL_DP_KEY, stats.averageCoverage(interval.size()));
    attributes.put(GATKVCFConstants.INTERVAL_GC_CONTENT_KEY, stats.gcContent());

    vcb = vcb.attributes(attributes);
    vcb = vcb.genotypes(genotypes);

    vcfWriter.add(vcb.make());
  }

  private boolean hasMissingStatuses(AbstractStratification stats) {
    return !stats.callableStatuses().isEmpty();
  }

  private boolean hasMissingLoci(final IntervalStratification stats) {
    return thresholds.missingTargets != null && hasMissingStatuses(stats);
  }

  private void outputMissingInterval(final IntervalStratification stats) {
    final GenomeLoc interval = stats.getInterval();
    final boolean missing[] = new boolean[interval.size()];
    Arrays.fill(missing, true);
    for (AbstractStratification sample : stats.getElements()) {
      if (hasMissingStatuses(sample)) {
        int pos = 0;
        for (AbstractStratification locus : sample.getElements()) {
          if (locus.callableStatuses().isEmpty()) {
            missing[pos] = false;
          }
          pos++;
        }
      }
    }
    int start = -1;
    boolean insideMissing = false;
    for (int i = 0; i < missing.length; i++) {
      if (missing[i] && !insideMissing) {
        start = interval.getStart() + i;
        insideMissing = true;
      } else if (!missing[i] && insideMissing) {
        final int stop = interval.getStart() + i - 1;
        outputMissingInterval(interval.getContig(), start, stop);
        insideMissing = false;
      }
    }
    if (insideMissing) {
      outputMissingInterval(interval.getContig(), start, interval.getStop());
    }
  }

  private void outputMissingInterval(final String contig, final int start, final int stop) {
    final PrintStream out = thresholds.missingTargets;
    out.println(String.format("%s:%d-%d", contig, start, stop));
  }

  /**
   * Function that process a set of statuses into strings
   *
   * @param statuses the set of statuses to be converted
   * @return a matching set of strings
   */
  private List<String> statusToStrings(
      Iterable<CallableStatus> statuses, final boolean isInfoField) {
    List<String> output = new LinkedList<>();

    for (CallableStatus status : statuses)
      if (isInfoField || status != CallableStatus.PASS) output.add(status.name());

    return output;
  }

  private IntervalStratification createIntervalStatistic(GenomeLoc interval) {
    return new IntervalStratification(samples, interval, thresholds);
  }

  protected static void loadAllPlugins(final ThresHolder thresholds) {
    for (Class<?> stat : new PluginManager<LocusMetric>(LocusMetric.class).getPlugins()) {
      try {
        final LocusMetric stats = (LocusMetric) stat.newInstance();
        stats.initialize(thresholds);
        thresholds.locusMetricList.add(stats);
      } catch (Exception e) {
        throw new DynamicClassResolutionException(stat, e);
      }
    }

    for (Class<?> stat : new PluginManager<SampleMetric>(SampleMetric.class).getPlugins()) {
      try {
        final SampleMetric stats = (SampleMetric) stat.newInstance();
        stats.initialize(thresholds);
        thresholds.sampleMetricList.add(stats);
      } catch (Exception e) {
        throw new DynamicClassResolutionException(stat, e);
      }
    }

    for (Class<?> stat : new PluginManager<IntervalMetric>(IntervalMetric.class).getPlugins()) {
      try {
        final IntervalMetric stats = (IntervalMetric) stat.newInstance();
        stats.initialize(thresholds);
        thresholds.intervalMetricList.add(stats);
      } catch (Exception e) {
        throw new DynamicClassResolutionException(stat, e);
      }
    }
  }

  /**
   * Gets the header lines for the VCF writer
   *
   * @return A set of VCF header lines
   */
  private static Set<VCFHeaderLine> getHeaderInfo() {
    Set<VCFHeaderLine> headerLines = new HashSet<>();

    // INFO fields for overall data
    headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AVG_INTERVAL_DP_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.INTERVAL_GC_CONTENT_KEY));
    headerLines.add(
        new VCFInfoHeaderLine(
            "Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));

    // FORMAT fields for each genotype
    headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
    headerLines.add(
        GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.AVG_INTERVAL_DP_BY_SAMPLE_KEY));
    headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.LOW_COVERAGE_LOCI));
    headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.ZERO_COVERAGE_LOCI));

    // FILTER fields
    for (CallableStatus stat : CallableStatus.values())
      headerLines.add(new VCFFilterHeaderLine(stat.name(), stat.description));

    return headerLines;
  }
}
  public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
    // see if we need to trim common reference base from all alleles
    boolean trimVC;

    // We need to trim common reference base from all alleles in all genotypes if a ref base is
    // common to all alleles
    Allele refAllele = inputVC.getReference();
    if (!inputVC.isVariant()) trimVC = false;
    else if (refAllele.isNull()) trimVC = false;
    else {
      trimVC =
          (AbstractVCFCodec.computeForwardClipping(
                  new ArrayList<Allele>(inputVC.getAlternateAlleles()),
                  inputVC.getReference().getDisplayString())
              > 0);
    }

    // nothing to do if we don't need to trim bases
    if (trimVC) {
      List<Allele> alleles = new ArrayList<Allele>();
      GenotypesContext genotypes = GenotypesContext.create();

      // set the reference base for indels in the attributes
      Map<String, Object> attributes = new TreeMap<String, Object>(inputVC.getAttributes());

      Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();

      for (Allele a : inputVC.getAlleles()) {
        if (a.isSymbolic()) {
          alleles.add(a);
          originalToTrimmedAlleleMap.put(a, a);
        } else {
          // get bases for current allele and create a new one with trimmed bases
          byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
          Allele trimmedAllele = Allele.create(newBases, a.isReference());
          alleles.add(trimmedAllele);
          originalToTrimmedAlleleMap.put(a, trimmedAllele);
        }
      }

      // detect case where we're trimming bases but resulting vc doesn't have any null allele. In
      // that case, we keep original representation
      // example: mixed records such as {TA*,TGA,TG}
      boolean hasNullAlleles = false;

      for (Allele a : originalToTrimmedAlleleMap.values()) {
        if (a.isNull()) hasNullAlleles = true;
        if (a.isReference()) refAllele = a;
      }

      if (!hasNullAlleles) return inputVC;
      // now we can recreate new genotypes with trimmed alleles
      for (final Genotype genotype : inputVC.getGenotypes()) {

        List<Allele> originalAlleles = genotype.getAlleles();
        List<Allele> trimmedAlleles = new ArrayList<Allele>();
        for (Allele a : originalAlleles) {
          if (a.isCalled()) trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
          else trimmedAlleles.add(Allele.NO_CALL);
        }
        genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
      }

      final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
      return builder
          .alleles(alleles)
          .genotypes(genotypes)
          .attributes(attributes)
          .referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0]))
          .make();
    }

    return inputVC;
  }
  public static VariantContext createVariantContextWithPaddedAlleles(
      VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
    // see if we need to pad common reference base from all alleles
    boolean padVC;

    // We need to pad a VC with a common base if the length of the reference allele is less than the
    // length of the VariantContext.
    // This happens because the position of e.g. an indel is always one before the actual event (as
    // per VCF convention).
    long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1;
    if (inputVC.hasSymbolicAlleles()) padVC = true;
    else if (inputVC.getReference().length() == locLength) padVC = false;
    else if (inputVC.getReference().length() == locLength - 1) padVC = true;
    else
      throw new IllegalArgumentException(
          "Badly formed variant context at location "
              + String.valueOf(inputVC.getStart())
              + " in contig "
              + inputVC.getChr()
              + ". Reference length must be at most one base shorter than location size");

    // nothing to do if we don't need to pad bases
    if (padVC) {
      if (!inputVC.hasReferenceBaseForIndel())
        throw new ReviewedStingException(
            "Badly formed variant context at location "
                + inputVC.getChr()
                + ":"
                + inputVC.getStart()
                + "; no padded reference base is available.");

      Byte refByte = inputVC.getReferenceBaseForIndel();

      List<Allele> alleles = new ArrayList<Allele>();

      for (Allele a : inputVC.getAlleles()) {
        // get bases for current allele and create a new one with trimmed bases
        if (a.isSymbolic()) {
          alleles.add(a);
        } else {
          String newBases;
          if (refBaseShouldBeAppliedToEndOfAlleles)
            newBases = a.getBaseString() + new String(new byte[] {refByte});
          else newBases = new String(new byte[] {refByte}) + a.getBaseString();
          alleles.add(Allele.create(newBases, a.isReference()));
        }
      }

      // now we can recreate new genotypes with trimmed alleles
      GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
      for (final Genotype g : inputVC.getGenotypes()) {
        List<Allele> inAlleles = g.getAlleles();
        List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
        for (Allele a : inAlleles) {
          if (a.isCalled()) {
            if (a.isSymbolic()) {
              newGenotypeAlleles.add(a);
            } else {
              String newBases;
              if (refBaseShouldBeAppliedToEndOfAlleles)
                newBases = a.getBaseString() + new String(new byte[] {refByte});
              else newBases = new String(new byte[] {refByte}) + a.getBaseString();
              newGenotypeAlleles.add(Allele.create(newBases, a.isReference()));
            }
          } else {
            // add no-call allele
            newGenotypeAlleles.add(Allele.NO_CALL);
          }
        }
        genotypes.add(
            new Genotype(
                g.getSampleName(),
                newGenotypeAlleles,
                g.getLog10PError(),
                g.getFilters(),
                g.getAttributes(),
                g.isPhased()));
      }

      return new VariantContextBuilder(inputVC).alleles(alleles).genotypes(genotypes).make();
    } else return inputVC;
  }