private VariantContext getDbsnp(String rsID) {
    if (dbsnpIterator == null) {

      if (dbsnp == null)
        throw new UserException.BadInput(
            "No dbSNP rod was provided, but one is needed to decipher the correct indel alleles from the HapMap records");

      RMDTrackBuilder builder =
          new RMDTrackBuilder(
              getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
              getToolkit().getGenomeLocParser(),
              getToolkit().getArguments().unsafe,
              getToolkit().getArguments().disableAutoIndexCreationAndLockingWhenReadingRods,
              null);
      dbsnpIterator =
          builder
              .createInstanceOfTrack(VCFCodec.class, new File(dbsnp.dbsnp.getSource()))
              .getIterator();
      // Note that we should really use some sort of seekable iterator here so that the search
      // doesn't take forever
      // (but it's complicated because the hapmap location doesn't match the dbsnp location, so we
      // don't know where to seek to)
    }

    while (dbsnpIterator.hasNext()) {
      GATKFeature feature = dbsnpIterator.next();
      VariantContext vc = (VariantContext) feature.getUnderlyingObject();
      if (vc.getID().equals(rsID)) return vc;
    }

    return null;
  }
 public VariantContextTestData(final VCFHeader header, final List<VariantContext> vcs) {
   final Set<String> samples = new HashSet<String>();
   for (final VariantContext vc : vcs)
     if (vc.hasGenotypes()) samples.addAll(vc.getSampleNames());
   this.header =
       samples.isEmpty() ? header : new VCFHeader(header.getMetaDataInSortedOrder(), samples);
   this.vcs = vcs;
 }
 @Override
 public VariantContext next() {
   try {
     final VariantContext vc = codec.decode(nextSource());
     return vc == null ? null : vc.fullyDecode(header, false);
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
 }
  private Collection<VariantContext> getVariantContexts(
      RefMetaDataTracker tracker, ReferenceContext ref) {

    List<Feature> features = tracker.getValues(variants, ref.getLocus());
    List<VariantContext> VCs = new ArrayList<VariantContext>(features.size());

    for (Feature record : features) {
      if (VariantContextAdaptors.canBeConvertedToVariantContext(record)) {
        // we need to special case the HapMap format because indels aren't handled correctly
        if (record instanceof RawHapMapFeature) {

          // is it an indel?
          RawHapMapFeature hapmap = (RawHapMapFeature) record;
          if (hapmap.getAlleles()[0].equals(RawHapMapFeature.NULL_ALLELE_STRING)
              || hapmap.getAlleles()[1].equals(RawHapMapFeature.NULL_ALLELE_STRING)) {
            // get the dbsnp object corresponding to this record (needed to help us distinguish
            // between insertions and deletions)
            VariantContext dbsnpVC = getDbsnp(hapmap.getName());
            if (dbsnpVC == null || dbsnpVC.isMixed()) continue;

            Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
            alleleMap.put(
                RawHapMapFeature.DELETION,
                Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion()));
            alleleMap.put(
                RawHapMapFeature.INSERTION,
                Allele.create(
                    (char) ref.getBase() + ((RawHapMapFeature) record).getAlleles()[1],
                    !dbsnpVC.isSimpleInsertion()));
            hapmap.setActualAlleles(alleleMap);

            // also, use the correct positioning for insertions
            hapmap.updatePosition(dbsnpVC.getStart());

            if (hapmap.getStart() < ref.getWindow().getStart()) {
              logger.warn(
                  "Hapmap record at "
                      + ref.getLocus()
                      + " represents an indel too large to be converted; skipping...");
              continue;
            }
          }
        }

        // ok, we might actually be able to turn this record in a variant context
        VariantContext vc =
            VariantContextAdaptors.toVariantContext(variants.getName(), record, ref);

        if (vc != null) // sometimes the track has odd stuff in it that can't be converted
        VCs.add(vc);
      }
    }

    return VCs;
  }
  private static void makeEmpiricalTests() throws IOException {
    if (ENABLE_SOURCE_VCF_TESTS) {
      for (final File file : testSourceVCFs) {
        VCFCodec codec = new VCFCodec();
        VariantContextContainer x = readAllVCs(file, codec);
        List<VariantContext> fullyDecoded = new ArrayList<VariantContext>();

        for (final VariantContext raw : x.getVCs()) {
          if (raw != null) fullyDecoded.add(raw.fullyDecode(x.getHeader(), false));
        }

        TEST_DATAs.add(new VariantContextTestData(x.getHeader(), fullyDecoded));
      }
    }
  }
 @Test(dataProvider = "UsingAD")
 public void testUsingAD(final VariantContext vc, final double expectedQD) {
   final Map<String, Object> annotatedMap =
       new QualByDepth().annotate(null, null, null, null, vc, null);
   Assert.assertNotNull(annotatedMap, vc.toString());
   final String QD = (String) annotatedMap.get("QD");
   Assert.assertEquals(Double.valueOf(QD), expectedQD, 0.0001);
 }
    public String toString() {
      StringBuilder b = new StringBuilder();
      b.append("VariantContextTestData: [");
      final VariantContext vc = vcs.get(0);
      final VariantContextBuilder builder = new VariantContextBuilder(vc);
      builder.noGenotypes();
      b.append(builder.make().toString());
      if (vc.getNSamples() < 5) {
        for (final Genotype g : vc.getGenotypes()) b.append(g.toString());
      } else {
        b.append(" nGenotypes = ").append(vc.getNSamples());
      }

      if (vcs.size() > 1)
        b.append(" ----- with another ").append(vcs.size() - 1).append(" VariantContext records");
      b.append("]");
      return b.toString();
    }
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null || !BaseUtils.isRegularBase(ref.getBase())) return 0;

    Collection<VariantContext> contexts = getVariantContexts(tracker, ref);

    for (VariantContext vc : contexts) {
      VariantContextBuilder builder = new VariantContextBuilder(vc);

      // set the appropriate sample name if necessary
      if (sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName())) {
        Genotype g =
            new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make();
        builder.genotypes(g);
      }

      final VariantContext withID = variantOverlapAnnotator.annotateRsID(tracker, builder.make());
      writeRecord(withID, tracker, ref.getLocus());
    }

    return 1;
  }
  private static void addGenotypesAndGTests() {
    //        for ( final int ploidy : Arrays.asList(2)) {
    for (final int ploidy : Arrays.asList(1, 2, 3, 4, 5)) {
      final List<List<String>> alleleCombinations =
          Arrays.asList(
              Arrays.asList("A"),
              Arrays.asList("A", "C"),
              Arrays.asList("A", "C", "G"),
              Arrays.asList("A", "C", "G", "T"));

      for (final List<String> alleles : alleleCombinations) {
        final VariantContextBuilder vcb = builder().alleles(alleles);
        final VariantContext site = vcb.make();
        final int nAlleles = site.getNAlleles();
        final Allele ref = site.getReference();

        // base genotype is ref/.../ref up to ploidy
        final List<Allele> baseGenotype = new ArrayList<Allele>(ploidy);
        for (int i = 0; i < ploidy; i++) baseGenotype.add(ref);
        final int nPLs = GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy);

        // ada is 0, 1, ..., nAlleles - 1
        final List<Integer> ada = new ArrayList<Integer>(nAlleles);
        for (int i = 0; i < nAlleles - 1; i++) ada.add(i);

        // pl is 0, 1, ..., up to nPLs (complex calc of nAlleles and ploidy)
        final int[] pl = new int[nPLs];
        for (int i = 0; i < pl.length; i++) pl[i] = i;

        final GenotypeBuilder gb = new GenotypeBuilder("ADA_PL_SAMPLE");
        gb.alleles(baseGenotype);
        gb.PL(pl);
        gb.attribute("ADA", nAlleles == 2 ? ada.get(0) : ada);
        vcb.genotypes(gb.make());

        add(vcb);
      }
    }
  }
Example #10
0
  private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) {
    if (!wroteHeader) {
      wroteHeader = true;

      // setup the header fields
      Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
      hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())));
      hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));

      allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
      for (VCFHeaderLine field : hInfo) {
        if (field instanceof VCFFormatHeaderLine) {
          allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine) field).getID());
        }
      }

      samples = new LinkedHashSet<String>();
      if (sampleName != null) {
        samples.add(sampleName);
      } else {
        // try VCF first
        samples =
            SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));

        if (samples.isEmpty()) {
          List<Feature> features = tracker.getValues(variants, loc);
          if (features.size() == 0)
            throw new IllegalStateException(
                "No rod data is present, but we just created a VariantContext");

          Feature f = features.get(0);
          if (f instanceof RawHapMapFeature)
            samples.addAll(Arrays.asList(((RawHapMapFeature) f).getSampleIDs()));
          else samples.addAll(vc.getSampleNames());
        }
      }

      vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
    }

    vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
    vcfwriter.add(vc);
  }
  public static void testReaderWriterWithMissingGenotypes(
      final VariantContextIOTest tester, final VariantContextTestData data) throws IOException {
    final int nSamples = data.header.getNGenotypeSamples();
    if (nSamples > 2) {
      for (final VariantContext vc : data.vcs)
        if (vc.isSymbolic())
          // cannot handle symbolic alleles because they may be weird non-call VCFs
          return;

      final File tmpFile = File.createTempFile("testReaderWriter", tester.getExtension());
      tmpFile.deleteOnExit();

      // write expected to disk
      final EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
      final VariantContextWriter writer = tester.makeWriter(tmpFile, options);

      final Set<String> samplesInVCF = new HashSet<String>(data.header.getGenotypeSamples());
      final List<String> missingSamples = Arrays.asList("MISSING1", "MISSING2");
      final List<String> allSamples = new ArrayList<String>(missingSamples);
      allSamples.addAll(samplesInVCF);

      final VCFHeader header = new VCFHeader(data.header.getMetaDataInInputOrder(), allSamples);
      writeVCsToFile(writer, header, data.vcs);

      // ensure writing of expected == actual
      final VariantContextContainer p = tester.readAllVCs(tmpFile);
      final Iterable<VariantContext> actual = p.getVCs();

      int i = 0;
      for (final VariantContext readVC : actual) {
        if (readVC == null) continue; // sometimes we read null records...
        final VariantContext expected = data.vcs.get(i++);
        for (final Genotype g : readVC.getGenotypes()) {
          Assert.assertTrue(allSamples.contains(g.getSampleName()));
          if (samplesInVCF.contains(g.getSampleName())) {
            assertEquals(g, expected.getGenotype(g.getSampleName()));
          } else {
            // missing
            Assert.assertTrue(g.isNoCall());
          }
        }
      }
    }
  }
  /**
   * Assert that two variant contexts are actually equal
   *
   * @param actual
   * @param expected
   */
  public static void assertEquals(final VariantContext actual, final VariantContext expected) {
    Assert.assertNotNull(actual, "VariantContext expected not null");
    Assert.assertEquals(actual.getChr(), expected.getChr(), "chr");
    Assert.assertEquals(actual.getStart(), expected.getStart(), "start");
    Assert.assertEquals(actual.getEnd(), expected.getEnd(), "end");
    Assert.assertEquals(actual.getID(), expected.getID(), "id");
    Assert.assertEquals(
        actual.getAlleles(), expected.getAlleles(), "alleles for " + expected + " vs " + actual);

    assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
    Assert.assertEquals(
        actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied");
    Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "isFiltered");
    VariantBaseTest.assertEqualsSet(actual.getFilters(), expected.getFilters(), "filters");
    VariantBaseTest.assertEqualsDoubleSmart(
        actual.getPhredScaledQual(), expected.getPhredScaledQual());

    Assert.assertEquals(actual.hasGenotypes(), expected.hasGenotypes(), "hasGenotypes");
    if (expected.hasGenotypes()) {
      VariantBaseTest.assertEqualsSet(
          actual.getSampleNames(), expected.getSampleNames(), "sample names set");
      Assert.assertEquals(
          actual.getSampleNamesOrderedByName(),
          expected.getSampleNamesOrderedByName(),
          "sample names");
      final Set<String> samples = expected.getSampleNames();
      for (final String sample : samples) {
        assertEquals(actual.getGenotype(sample), expected.getGenotype(sample));
      }
    }
  }
  private static void addGenotypes(final VariantContext site) {
    // test ref/ref
    final Allele ref = site.getReference();
    final Allele alt1 = site.getNAlleles() > 1 ? site.getAlternateAllele(0) : null;
    final Genotype homRef = GenotypeBuilder.create("homRef", Arrays.asList(ref, ref));
    addGenotypeTests(site, homRef);

    if (alt1 != null) {
      final Genotype het = GenotypeBuilder.create("het", Arrays.asList(ref, alt1));
      final Genotype homVar = GenotypeBuilder.create("homVar", Arrays.asList(alt1, alt1));
      addGenotypeTests(site, homRef, het);
      addGenotypeTests(site, homRef, het, homVar);

      // test no GT at all
      addGenotypeTests(
          site, new GenotypeBuilder("noGT", new ArrayList<Allele>(0)).attribute("INT1", 10).make());

      final List<Allele> noCall = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);

      // ploidy
      if (ENABLE_PLOIDY_TESTS) {
        addGenotypeTests(
            site,
            GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
            GenotypeBuilder.create("hap", Arrays.asList(ref)));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("noCall", noCall),
            GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
            GenotypeBuilder.create("hap", Arrays.asList(ref)));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("noCall", noCall),
            GenotypeBuilder.create("noCall2", noCall),
            GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
            GenotypeBuilder.create("hap", Arrays.asList(ref)));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
            GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1)));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("noCall", noCall),
            GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
            GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1)));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("noCall", noCall),
            GenotypeBuilder.create("noCall2", noCall),
            GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
            GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1)));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("nocall", noCall),
            GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
            GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1)));
      }

      //
      //
      // TESTING PHASE
      //
      //
      final Genotype gUnphased = new GenotypeBuilder("gUnphased", Arrays.asList(ref, alt1)).make();
      final Genotype gPhased =
          new GenotypeBuilder("gPhased", Arrays.asList(ref, alt1)).phased(true).make();
      final Genotype gPhased2 =
          new GenotypeBuilder("gPhased2", Arrays.asList(alt1, alt1)).phased(true).make();
      final Genotype gPhased3 =
          new GenotypeBuilder("gPhased3", Arrays.asList(ref, ref)).phased(true).make();
      final Genotype haploidNoPhase =
          new GenotypeBuilder("haploidNoPhase", Arrays.asList(ref)).make();
      addGenotypeTests(site, gUnphased, gPhased);
      addGenotypeTests(site, gUnphased, gPhased2);
      addGenotypeTests(site, gUnphased, gPhased3);
      addGenotypeTests(site, gPhased, gPhased2);
      addGenotypeTests(site, gPhased, gPhased3);
      addGenotypeTests(site, gPhased2, gPhased3);
      addGenotypeTests(site, haploidNoPhase, gPhased);
      addGenotypeTests(site, haploidNoPhase, gPhased2);
      addGenotypeTests(site, haploidNoPhase, gPhased3);
      addGenotypeTests(site, haploidNoPhase, gPhased, gPhased2);
      addGenotypeTests(site, haploidNoPhase, gPhased, gPhased3);
      addGenotypeTests(site, haploidNoPhase, gPhased2, gPhased3);
      addGenotypeTests(site, haploidNoPhase, gPhased, gPhased2, gPhased3);

      final Genotype gUnphasedTet =
          new GenotypeBuilder("gUnphasedTet", Arrays.asList(ref, alt1, ref, alt1)).make();
      final Genotype gPhasedTet =
          new GenotypeBuilder("gPhasedTet", Arrays.asList(ref, alt1, alt1, alt1))
              .phased(true)
              .make();
      addGenotypeTests(site, gUnphasedTet, gPhasedTet);
    }

    if (ENABLE_PL_TESTS) {
      if (site.getNAlleles() == 2) {
        // testing PLs
        addGenotypeTests(
            site,
            GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {0, -1, -2}),
            GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[] {0, -2, -3}));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {-1, 0, -2}),
            GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[] {0, -2, -3}));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {-1, 0, -2}),
            GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[] {0, -2000, -1000}));

        addGenotypeTests(
            site, // missing PLs
            GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {-1, 0, -2}),
            GenotypeBuilder.create("g2", Arrays.asList(ref, ref)));
      } else if (site.getNAlleles() == 3) {
        // testing PLs
        addGenotypeTests(
            site,
            GenotypeBuilder.create(
                "g1", Arrays.asList(ref, ref), new double[] {0, -1, -2, -3, -4, -5}),
            GenotypeBuilder.create(
                "g2", Arrays.asList(ref, ref), new double[] {0, -2, -3, -4, -5, -6}));
      }
    }

    // test attributes
    addGenotypeTests(site, attr("g1", ref, "INT1", 1), attr("g2", ref, "INT1", 2));
    addGenotypeTests(site, attr("g1", ref, "INT1", 1), attr("g2", ref, "INT1"));
    addGenotypeTests(site, attr("g1", ref, "INT3", 1, 2, 3), attr("g2", ref, "INT3", 4, 5, 6));
    addGenotypeTests(site, attr("g1", ref, "INT3", 1, 2, 3), attr("g2", ref, "INT3"));

    addGenotypeTests(
        site, attr("g1", ref, "INT20", TWENTY_INTS), attr("g2", ref, "INT20", TWENTY_INTS));

    if (ENABLE_VARARRAY_TESTS) {
      addGenotypeTests(
          site,
          attr("g1", ref, "INT.VAR", 1, 2, 3),
          attr("g2", ref, "INT.VAR", 4, 5),
          attr("g3", ref, "INT.VAR", 6));
      addGenotypeTests(
          site,
          attr("g1", ref, "INT.VAR", 1, 2, 3),
          attr("g2", ref, "INT.VAR"),
          attr("g3", ref, "INT.VAR", 5));
    }

    addGenotypeTests(site, attr("g1", ref, "FLOAT1", 1.0), attr("g2", ref, "FLOAT1", 2.0));
    addGenotypeTests(site, attr("g1", ref, "FLOAT1", 1.0), attr("g2", ref, "FLOAT1"));
    addGenotypeTests(
        site, attr("g1", ref, "FLOAT3", 1.0, 2.0, 3.0), attr("g2", ref, "FLOAT3", 4.0, 5.0, 6.0));
    addGenotypeTests(site, attr("g1", ref, "FLOAT3", 1.0, 2.0, 3.0), attr("g2", ref, "FLOAT3"));

    if (ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS) {
      //
      //
      // TESTING MULTIPLE SIZED LISTS IN THE GENOTYPE FIELD
      //
      //
      addGenotypeTests(
          site,
          attr("g1", ref, "GS", Arrays.asList("S1", "S2")),
          attr("g2", ref, "GS", Arrays.asList("S3", "S4")));

      addGenotypeTests(
          site, // g1 is missing the string, and g2 is missing FLOAT1
          attr("g1", ref, "FLOAT1", 1.0),
          attr("g2", ref, "GS", Arrays.asList("S3", "S4")));

      // variable sized lists
      addGenotypeTests(
          site, attr("g1", ref, "GV", "S1"), attr("g2", ref, "GV", Arrays.asList("S3", "S4")));

      addGenotypeTests(
          site,
          attr("g1", ref, "GV", Arrays.asList("S1", "S2")),
          attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5")));

      addGenotypeTests(
          site, // missing value in varlist of string
          attr("g1", ref, "FLOAT1", 1.0),
          attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5")));
    }

    //
    //
    // TESTING GENOTYPE FILTERS
    //
    //
    addGenotypeTests(
        site,
        new GenotypeBuilder("g1-x", Arrays.asList(ref, ref)).filters("X").make(),
        new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make());
    addGenotypeTests(
        site,
        new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
        new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make());
    addGenotypeTests(
        site,
        new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
        new GenotypeBuilder("g2-xy", Arrays.asList(ref, ref)).filters("X", "Y").make());
    addGenotypeTests(
        site,
        new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
        new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make(),
        new GenotypeBuilder("g3-xy", Arrays.asList(ref, ref)).filters("X", "Y").make());
  }