private VariantContext getDbsnp(String rsID) { if (dbsnpIterator == null) { if (dbsnp == null) throw new UserException.BadInput( "No dbSNP rod was provided, but one is needed to decipher the correct indel alleles from the HapMap records"); RMDTrackBuilder builder = new RMDTrackBuilder( getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), getToolkit().getGenomeLocParser(), getToolkit().getArguments().unsafe, getToolkit().getArguments().disableAutoIndexCreationAndLockingWhenReadingRods, null); dbsnpIterator = builder .createInstanceOfTrack(VCFCodec.class, new File(dbsnp.dbsnp.getSource())) .getIterator(); // Note that we should really use some sort of seekable iterator here so that the search // doesn't take forever // (but it's complicated because the hapmap location doesn't match the dbsnp location, so we // don't know where to seek to) } while (dbsnpIterator.hasNext()) { GATKFeature feature = dbsnpIterator.next(); VariantContext vc = (VariantContext) feature.getUnderlyingObject(); if (vc.getID().equals(rsID)) return vc; } return null; }
public VariantContextTestData(final VCFHeader header, final List<VariantContext> vcs) { final Set<String> samples = new HashSet<String>(); for (final VariantContext vc : vcs) if (vc.hasGenotypes()) samples.addAll(vc.getSampleNames()); this.header = samples.isEmpty() ? header : new VCFHeader(header.getMetaDataInSortedOrder(), samples); this.vcs = vcs; }
@Override public VariantContext next() { try { final VariantContext vc = codec.decode(nextSource()); return vc == null ? null : vc.fullyDecode(header, false); } catch (IOException e) { throw new RuntimeException(e); } }
private Collection<VariantContext> getVariantContexts( RefMetaDataTracker tracker, ReferenceContext ref) { List<Feature> features = tracker.getValues(variants, ref.getLocus()); List<VariantContext> VCs = new ArrayList<VariantContext>(features.size()); for (Feature record : features) { if (VariantContextAdaptors.canBeConvertedToVariantContext(record)) { // we need to special case the HapMap format because indels aren't handled correctly if (record instanceof RawHapMapFeature) { // is it an indel? RawHapMapFeature hapmap = (RawHapMapFeature) record; if (hapmap.getAlleles()[0].equals(RawHapMapFeature.NULL_ALLELE_STRING) || hapmap.getAlleles()[1].equals(RawHapMapFeature.NULL_ALLELE_STRING)) { // get the dbsnp object corresponding to this record (needed to help us distinguish // between insertions and deletions) VariantContext dbsnpVC = getDbsnp(hapmap.getName()); if (dbsnpVC == null || dbsnpVC.isMixed()) continue; Map<String, Allele> alleleMap = new HashMap<String, Allele>(2); alleleMap.put( RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion())); alleleMap.put( RawHapMapFeature.INSERTION, Allele.create( (char) ref.getBase() + ((RawHapMapFeature) record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); hapmap.setActualAlleles(alleleMap); // also, use the correct positioning for insertions hapmap.updatePosition(dbsnpVC.getStart()); if (hapmap.getStart() < ref.getWindow().getStart()) { logger.warn( "Hapmap record at " + ref.getLocus() + " represents an indel too large to be converted; skipping..."); continue; } } } // ok, we might actually be able to turn this record in a variant context VariantContext vc = VariantContextAdaptors.toVariantContext(variants.getName(), record, ref); if (vc != null) // sometimes the track has odd stuff in it that can't be converted VCs.add(vc); } } return VCs; }
private static void makeEmpiricalTests() throws IOException { if (ENABLE_SOURCE_VCF_TESTS) { for (final File file : testSourceVCFs) { VCFCodec codec = new VCFCodec(); VariantContextContainer x = readAllVCs(file, codec); List<VariantContext> fullyDecoded = new ArrayList<VariantContext>(); for (final VariantContext raw : x.getVCs()) { if (raw != null) fullyDecoded.add(raw.fullyDecode(x.getHeader(), false)); } TEST_DATAs.add(new VariantContextTestData(x.getHeader(), fullyDecoded)); } } }
@Test(dataProvider = "UsingAD") public void testUsingAD(final VariantContext vc, final double expectedQD) { final Map<String, Object> annotatedMap = new QualByDepth().annotate(null, null, null, null, vc, null); Assert.assertNotNull(annotatedMap, vc.toString()); final String QD = (String) annotatedMap.get("QD"); Assert.assertEquals(Double.valueOf(QD), expectedQD, 0.0001); }
public String toString() { StringBuilder b = new StringBuilder(); b.append("VariantContextTestData: ["); final VariantContext vc = vcs.get(0); final VariantContextBuilder builder = new VariantContextBuilder(vc); builder.noGenotypes(); b.append(builder.make().toString()); if (vc.getNSamples() < 5) { for (final Genotype g : vc.getGenotypes()) b.append(g.toString()); } else { b.append(" nGenotypes = ").append(vc.getNSamples()); } if (vcs.size() > 1) b.append(" ----- with another ").append(vcs.size() - 1).append(" VariantContext records"); b.append("]"); return b.toString(); }
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker == null || !BaseUtils.isRegularBase(ref.getBase())) return 0; Collection<VariantContext> contexts = getVariantContexts(tracker, ref); for (VariantContext vc : contexts) { VariantContextBuilder builder = new VariantContextBuilder(vc); // set the appropriate sample name if necessary if (sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName())) { Genotype g = new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make(); builder.genotypes(g); } final VariantContext withID = variantOverlapAnnotator.annotateRsID(tracker, builder.make()); writeRecord(withID, tracker, ref.getLocus()); } return 1; }
private static void addGenotypesAndGTests() { // for ( final int ploidy : Arrays.asList(2)) { for (final int ploidy : Arrays.asList(1, 2, 3, 4, 5)) { final List<List<String>> alleleCombinations = Arrays.asList( Arrays.asList("A"), Arrays.asList("A", "C"), Arrays.asList("A", "C", "G"), Arrays.asList("A", "C", "G", "T")); for (final List<String> alleles : alleleCombinations) { final VariantContextBuilder vcb = builder().alleles(alleles); final VariantContext site = vcb.make(); final int nAlleles = site.getNAlleles(); final Allele ref = site.getReference(); // base genotype is ref/.../ref up to ploidy final List<Allele> baseGenotype = new ArrayList<Allele>(ploidy); for (int i = 0; i < ploidy; i++) baseGenotype.add(ref); final int nPLs = GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy); // ada is 0, 1, ..., nAlleles - 1 final List<Integer> ada = new ArrayList<Integer>(nAlleles); for (int i = 0; i < nAlleles - 1; i++) ada.add(i); // pl is 0, 1, ..., up to nPLs (complex calc of nAlleles and ploidy) final int[] pl = new int[nPLs]; for (int i = 0; i < pl.length; i++) pl[i] = i; final GenotypeBuilder gb = new GenotypeBuilder("ADA_PL_SAMPLE"); gb.alleles(baseGenotype); gb.PL(pl); gb.attribute("ADA", nAlleles == 2 ? ada.get(0) : ada); vcb.genotypes(gb.make()); add(vcb); } } }
private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) { if (!wroteHeader) { wroteHeader = true; // setup the header fields Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName()))); hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY)); allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY); for (VCFHeaderLine field : hInfo) { if (field instanceof VCFFormatHeaderLine) { allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine) field).getID()); } } samples = new LinkedHashSet<String>(); if (sampleName != null) { samples.add(sampleName); } else { // try VCF first samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); if (samples.isEmpty()) { List<Feature> features = tracker.getValues(variants, loc); if (features.size() == 0) throw new IllegalStateException( "No rod data is present, but we just created a VariantContext"); Feature f = features.get(0); if (f instanceof RawHapMapFeature) samples.addAll(Arrays.asList(((RawHapMapFeature) f).getSampleIDs())); else samples.addAll(vc.getSampleNames()); } } vcfwriter.writeHeader(new VCFHeader(hInfo, samples)); } vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings); vcfwriter.add(vc); }
public static void testReaderWriterWithMissingGenotypes( final VariantContextIOTest tester, final VariantContextTestData data) throws IOException { final int nSamples = data.header.getNGenotypeSamples(); if (nSamples > 2) { for (final VariantContext vc : data.vcs) if (vc.isSymbolic()) // cannot handle symbolic alleles because they may be weird non-call VCFs return; final File tmpFile = File.createTempFile("testReaderWriter", tester.getExtension()); tmpFile.deleteOnExit(); // write expected to disk final EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY); final VariantContextWriter writer = tester.makeWriter(tmpFile, options); final Set<String> samplesInVCF = new HashSet<String>(data.header.getGenotypeSamples()); final List<String> missingSamples = Arrays.asList("MISSING1", "MISSING2"); final List<String> allSamples = new ArrayList<String>(missingSamples); allSamples.addAll(samplesInVCF); final VCFHeader header = new VCFHeader(data.header.getMetaDataInInputOrder(), allSamples); writeVCsToFile(writer, header, data.vcs); // ensure writing of expected == actual final VariantContextContainer p = tester.readAllVCs(tmpFile); final Iterable<VariantContext> actual = p.getVCs(); int i = 0; for (final VariantContext readVC : actual) { if (readVC == null) continue; // sometimes we read null records... final VariantContext expected = data.vcs.get(i++); for (final Genotype g : readVC.getGenotypes()) { Assert.assertTrue(allSamples.contains(g.getSampleName())); if (samplesInVCF.contains(g.getSampleName())) { assertEquals(g, expected.getGenotype(g.getSampleName())); } else { // missing Assert.assertTrue(g.isNoCall()); } } } } }
/** * Assert that two variant contexts are actually equal * * @param actual * @param expected */ public static void assertEquals(final VariantContext actual, final VariantContext expected) { Assert.assertNotNull(actual, "VariantContext expected not null"); Assert.assertEquals(actual.getChr(), expected.getChr(), "chr"); Assert.assertEquals(actual.getStart(), expected.getStart(), "start"); Assert.assertEquals(actual.getEnd(), expected.getEnd(), "end"); Assert.assertEquals(actual.getID(), expected.getID(), "id"); Assert.assertEquals( actual.getAlleles(), expected.getAlleles(), "alleles for " + expected + " vs " + actual); assertAttributesEquals(actual.getAttributes(), expected.getAttributes()); Assert.assertEquals( actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied"); Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "isFiltered"); VariantBaseTest.assertEqualsSet(actual.getFilters(), expected.getFilters(), "filters"); VariantBaseTest.assertEqualsDoubleSmart( actual.getPhredScaledQual(), expected.getPhredScaledQual()); Assert.assertEquals(actual.hasGenotypes(), expected.hasGenotypes(), "hasGenotypes"); if (expected.hasGenotypes()) { VariantBaseTest.assertEqualsSet( actual.getSampleNames(), expected.getSampleNames(), "sample names set"); Assert.assertEquals( actual.getSampleNamesOrderedByName(), expected.getSampleNamesOrderedByName(), "sample names"); final Set<String> samples = expected.getSampleNames(); for (final String sample : samples) { assertEquals(actual.getGenotype(sample), expected.getGenotype(sample)); } } }
private static void addGenotypes(final VariantContext site) { // test ref/ref final Allele ref = site.getReference(); final Allele alt1 = site.getNAlleles() > 1 ? site.getAlternateAllele(0) : null; final Genotype homRef = GenotypeBuilder.create("homRef", Arrays.asList(ref, ref)); addGenotypeTests(site, homRef); if (alt1 != null) { final Genotype het = GenotypeBuilder.create("het", Arrays.asList(ref, alt1)); final Genotype homVar = GenotypeBuilder.create("homVar", Arrays.asList(alt1, alt1)); addGenotypeTests(site, homRef, het); addGenotypeTests(site, homRef, het, homVar); // test no GT at all addGenotypeTests( site, new GenotypeBuilder("noGT", new ArrayList<Allele>(0)).attribute("INT1", 10).make()); final List<Allele> noCall = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); // ploidy if (ENABLE_PLOIDY_TESTS) { addGenotypeTests( site, GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("hap", Arrays.asList(ref))); addGenotypeTests( site, GenotypeBuilder.create("noCall", noCall), GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("hap", Arrays.asList(ref))); addGenotypeTests( site, GenotypeBuilder.create("noCall", noCall), GenotypeBuilder.create("noCall2", noCall), GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("hap", Arrays.asList(ref))); addGenotypeTests( site, GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1))); addGenotypeTests( site, GenotypeBuilder.create("noCall", noCall), GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1))); addGenotypeTests( site, GenotypeBuilder.create("noCall", noCall), GenotypeBuilder.create("noCall2", noCall), GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1))); addGenotypeTests( site, GenotypeBuilder.create("nocall", noCall), GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1))); } // // // TESTING PHASE // // final Genotype gUnphased = new GenotypeBuilder("gUnphased", Arrays.asList(ref, alt1)).make(); final Genotype gPhased = new GenotypeBuilder("gPhased", Arrays.asList(ref, alt1)).phased(true).make(); final Genotype gPhased2 = new GenotypeBuilder("gPhased2", Arrays.asList(alt1, alt1)).phased(true).make(); final Genotype gPhased3 = new GenotypeBuilder("gPhased3", Arrays.asList(ref, ref)).phased(true).make(); final Genotype haploidNoPhase = new GenotypeBuilder("haploidNoPhase", Arrays.asList(ref)).make(); addGenotypeTests(site, gUnphased, gPhased); addGenotypeTests(site, gUnphased, gPhased2); addGenotypeTests(site, gUnphased, gPhased3); addGenotypeTests(site, gPhased, gPhased2); addGenotypeTests(site, gPhased, gPhased3); addGenotypeTests(site, gPhased2, gPhased3); addGenotypeTests(site, haploidNoPhase, gPhased); addGenotypeTests(site, haploidNoPhase, gPhased2); addGenotypeTests(site, haploidNoPhase, gPhased3); addGenotypeTests(site, haploidNoPhase, gPhased, gPhased2); addGenotypeTests(site, haploidNoPhase, gPhased, gPhased3); addGenotypeTests(site, haploidNoPhase, gPhased2, gPhased3); addGenotypeTests(site, haploidNoPhase, gPhased, gPhased2, gPhased3); final Genotype gUnphasedTet = new GenotypeBuilder("gUnphasedTet", Arrays.asList(ref, alt1, ref, alt1)).make(); final Genotype gPhasedTet = new GenotypeBuilder("gPhasedTet", Arrays.asList(ref, alt1, alt1, alt1)) .phased(true) .make(); addGenotypeTests(site, gUnphasedTet, gPhasedTet); } if (ENABLE_PL_TESTS) { if (site.getNAlleles() == 2) { // testing PLs addGenotypeTests( site, GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {0, -1, -2}), GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[] {0, -2, -3})); addGenotypeTests( site, GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {-1, 0, -2}), GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[] {0, -2, -3})); addGenotypeTests( site, GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {-1, 0, -2}), GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[] {0, -2000, -1000})); addGenotypeTests( site, // missing PLs GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {-1, 0, -2}), GenotypeBuilder.create("g2", Arrays.asList(ref, ref))); } else if (site.getNAlleles() == 3) { // testing PLs addGenotypeTests( site, GenotypeBuilder.create( "g1", Arrays.asList(ref, ref), new double[] {0, -1, -2, -3, -4, -5}), GenotypeBuilder.create( "g2", Arrays.asList(ref, ref), new double[] {0, -2, -3, -4, -5, -6})); } } // test attributes addGenotypeTests(site, attr("g1", ref, "INT1", 1), attr("g2", ref, "INT1", 2)); addGenotypeTests(site, attr("g1", ref, "INT1", 1), attr("g2", ref, "INT1")); addGenotypeTests(site, attr("g1", ref, "INT3", 1, 2, 3), attr("g2", ref, "INT3", 4, 5, 6)); addGenotypeTests(site, attr("g1", ref, "INT3", 1, 2, 3), attr("g2", ref, "INT3")); addGenotypeTests( site, attr("g1", ref, "INT20", TWENTY_INTS), attr("g2", ref, "INT20", TWENTY_INTS)); if (ENABLE_VARARRAY_TESTS) { addGenotypeTests( site, attr("g1", ref, "INT.VAR", 1, 2, 3), attr("g2", ref, "INT.VAR", 4, 5), attr("g3", ref, "INT.VAR", 6)); addGenotypeTests( site, attr("g1", ref, "INT.VAR", 1, 2, 3), attr("g2", ref, "INT.VAR"), attr("g3", ref, "INT.VAR", 5)); } addGenotypeTests(site, attr("g1", ref, "FLOAT1", 1.0), attr("g2", ref, "FLOAT1", 2.0)); addGenotypeTests(site, attr("g1", ref, "FLOAT1", 1.0), attr("g2", ref, "FLOAT1")); addGenotypeTests( site, attr("g1", ref, "FLOAT3", 1.0, 2.0, 3.0), attr("g2", ref, "FLOAT3", 4.0, 5.0, 6.0)); addGenotypeTests(site, attr("g1", ref, "FLOAT3", 1.0, 2.0, 3.0), attr("g2", ref, "FLOAT3")); if (ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS) { // // // TESTING MULTIPLE SIZED LISTS IN THE GENOTYPE FIELD // // addGenotypeTests( site, attr("g1", ref, "GS", Arrays.asList("S1", "S2")), attr("g2", ref, "GS", Arrays.asList("S3", "S4"))); addGenotypeTests( site, // g1 is missing the string, and g2 is missing FLOAT1 attr("g1", ref, "FLOAT1", 1.0), attr("g2", ref, "GS", Arrays.asList("S3", "S4"))); // variable sized lists addGenotypeTests( site, attr("g1", ref, "GV", "S1"), attr("g2", ref, "GV", Arrays.asList("S3", "S4"))); addGenotypeTests( site, attr("g1", ref, "GV", Arrays.asList("S1", "S2")), attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5"))); addGenotypeTests( site, // missing value in varlist of string attr("g1", ref, "FLOAT1", 1.0), attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5"))); } // // // TESTING GENOTYPE FILTERS // // addGenotypeTests( site, new GenotypeBuilder("g1-x", Arrays.asList(ref, ref)).filters("X").make(), new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make()); addGenotypeTests( site, new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(), new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make()); addGenotypeTests( site, new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(), new GenotypeBuilder("g2-xy", Arrays.asList(ref, ref)).filters("X", "Y").make()); addGenotypeTests( site, new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(), new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make(), new GenotypeBuilder("g3-xy", Arrays.asList(ref, ref)).filters("X", "Y").make()); }