public static void addComplexGenotypesTest() { final List<Allele> allAlleles = Arrays.asList( Allele.create("A", true), Allele.create("C", false), Allele.create("G", false)); for (int nAlleles : Arrays.asList(2, 3)) { for (int highestPloidy : Arrays.asList(1, 2, 3)) { // site alleles final List<Allele> siteAlleles = allAlleles.subList(0, nAlleles); // possible alleles for genotypes final List<Allele> possibleGenotypeAlleles = new ArrayList<Allele>(siteAlleles); possibleGenotypeAlleles.add(Allele.NO_CALL); // there are n^ploidy possible genotypes final List<List<Allele>> possibleGenotypes = makeAllGenotypes(possibleGenotypeAlleles, highestPloidy); final int nPossibleGenotypes = possibleGenotypes.size(); VariantContextBuilder vb = new VariantContextBuilder("unittest", "1", 1, 1, siteAlleles); // first test -- create n copies of each genotype for (int i = 0; i < nPossibleGenotypes; i++) { final List<Genotype> samples = new ArrayList<Genotype>(3); samples.add(GenotypeBuilder.create("sample" + i, possibleGenotypes.get(i))); add(vb.genotypes(samples)); } // second test -- create one sample with each genotype { final List<Genotype> samples = new ArrayList<Genotype>(nPossibleGenotypes); for (int i = 0; i < nPossibleGenotypes; i++) { samples.add(GenotypeBuilder.create("sample" + i, possibleGenotypes.get(i))); } add(vb.genotypes(samples)); } // test mixed ploidy for (int i = 0; i < nPossibleGenotypes; i++) { for (int ploidy = 1; ploidy < highestPloidy; ploidy++) { final List<Genotype> samples = new ArrayList<Genotype>(highestPloidy); final List<Allele> genotype = possibleGenotypes.get(i).subList(0, ploidy); samples.add(GenotypeBuilder.create("sample" + i, genotype)); add(vb.genotypes(samples)); } } } } }
private static Genotype attr( final String name, final Allele ref, final String key, final Object... value) { if (value.length == 0) return GenotypeBuilder.create(name, Arrays.asList(ref, ref)); else { final Object toAdd = value.length == 1 ? value[0] : Arrays.asList(value); return new GenotypeBuilder(name, Arrays.asList(ref, ref)).attribute(key, toAdd).make(); } }
/** * Takes the interval, finds it in the stash, prints it to the VCF * * @param stats The statistics of the interval * @param refAllele the reference allele */ private void outputStatsToVCF(final IntervalStratification stats, final Allele refAllele) { GenomeLoc interval = stats.getInterval(); final List<Allele> alleles = new ArrayList<>(); final Map<String, Object> attributes = new HashMap<>(); final ArrayList<Genotype> genotypes = new ArrayList<>(); for (String sample : samples) { final GenotypeBuilder gb = new GenotypeBuilder(sample); SampleStratification sampleStat = stats.getSampleStatistics(sample); gb.attribute( GATKVCFConstants.AVG_INTERVAL_DP_BY_SAMPLE_KEY, sampleStat.averageCoverage(interval.size())); gb.attribute(GATKVCFConstants.LOW_COVERAGE_LOCI, sampleStat.getNLowCoveredLoci()); gb.attribute(GATKVCFConstants.ZERO_COVERAGE_LOCI, sampleStat.getNUncoveredLoci()); gb.filters(statusToStrings(stats.getSampleStatistics(sample).callableStatuses(), false)); genotypes.add(gb.make()); } alleles.add(refAllele); alleles.add(SYMBOLIC_ALLELE); VariantContextBuilder vcb = new VariantContextBuilder( "DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStop(), alleles); vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); vcb.filters(new LinkedHashSet<>(statusToStrings(stats.callableStatuses(), true))); attributes.put(VCFConstants.END_KEY, interval.getStop()); attributes.put(GATKVCFConstants.AVG_INTERVAL_DP_KEY, stats.averageCoverage(interval.size())); attributes.put(GATKVCFConstants.INTERVAL_GC_CONTENT_KEY, stats.gcContent()); vcb = vcb.attributes(attributes); vcb = vcb.genotypes(genotypes); vcfWriter.add(vcb.make()); }
private static void addGenotypesAndGTests() { // for ( final int ploidy : Arrays.asList(2)) { for (final int ploidy : Arrays.asList(1, 2, 3, 4, 5)) { final List<List<String>> alleleCombinations = Arrays.asList( Arrays.asList("A"), Arrays.asList("A", "C"), Arrays.asList("A", "C", "G"), Arrays.asList("A", "C", "G", "T")); for (final List<String> alleles : alleleCombinations) { final VariantContextBuilder vcb = builder().alleles(alleles); final VariantContext site = vcb.make(); final int nAlleles = site.getNAlleles(); final Allele ref = site.getReference(); // base genotype is ref/.../ref up to ploidy final List<Allele> baseGenotype = new ArrayList<Allele>(ploidy); for (int i = 0; i < ploidy; i++) baseGenotype.add(ref); final int nPLs = GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy); // ada is 0, 1, ..., nAlleles - 1 final List<Integer> ada = new ArrayList<Integer>(nAlleles); for (int i = 0; i < nAlleles - 1; i++) ada.add(i); // pl is 0, 1, ..., up to nPLs (complex calc of nAlleles and ploidy) final int[] pl = new int[nPLs]; for (int i = 0; i < pl.length; i++) pl[i] = i; final GenotypeBuilder gb = new GenotypeBuilder("ADA_PL_SAMPLE"); gb.alleles(baseGenotype); gb.PL(pl); gb.attribute("ADA", nAlleles == 2 ? ada.get(0) : ada); vcb.genotypes(gb.make()); add(vcb); } } }
private static void addGenotypes(final VariantContext site) { // test ref/ref final Allele ref = site.getReference(); final Allele alt1 = site.getNAlleles() > 1 ? site.getAlternateAllele(0) : null; final Genotype homRef = GenotypeBuilder.create("homRef", Arrays.asList(ref, ref)); addGenotypeTests(site, homRef); if (alt1 != null) { final Genotype het = GenotypeBuilder.create("het", Arrays.asList(ref, alt1)); final Genotype homVar = GenotypeBuilder.create("homVar", Arrays.asList(alt1, alt1)); addGenotypeTests(site, homRef, het); addGenotypeTests(site, homRef, het, homVar); // test no GT at all addGenotypeTests( site, new GenotypeBuilder("noGT", new ArrayList<Allele>(0)).attribute("INT1", 10).make()); final List<Allele> noCall = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); // ploidy if (ENABLE_PLOIDY_TESTS) { addGenotypeTests( site, GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("hap", Arrays.asList(ref))); addGenotypeTests( site, GenotypeBuilder.create("noCall", noCall), GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("hap", Arrays.asList(ref))); addGenotypeTests( site, GenotypeBuilder.create("noCall", noCall), GenotypeBuilder.create("noCall2", noCall), GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("hap", Arrays.asList(ref))); addGenotypeTests( site, GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1))); addGenotypeTests( site, GenotypeBuilder.create("noCall", noCall), GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1))); addGenotypeTests( site, GenotypeBuilder.create("noCall", noCall), GenotypeBuilder.create("noCall2", noCall), GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1))); addGenotypeTests( site, GenotypeBuilder.create("nocall", noCall), GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1))); } // // // TESTING PHASE // // final Genotype gUnphased = new GenotypeBuilder("gUnphased", Arrays.asList(ref, alt1)).make(); final Genotype gPhased = new GenotypeBuilder("gPhased", Arrays.asList(ref, alt1)).phased(true).make(); final Genotype gPhased2 = new GenotypeBuilder("gPhased2", Arrays.asList(alt1, alt1)).phased(true).make(); final Genotype gPhased3 = new GenotypeBuilder("gPhased3", Arrays.asList(ref, ref)).phased(true).make(); final Genotype haploidNoPhase = new GenotypeBuilder("haploidNoPhase", Arrays.asList(ref)).make(); addGenotypeTests(site, gUnphased, gPhased); addGenotypeTests(site, gUnphased, gPhased2); addGenotypeTests(site, gUnphased, gPhased3); addGenotypeTests(site, gPhased, gPhased2); addGenotypeTests(site, gPhased, gPhased3); addGenotypeTests(site, gPhased2, gPhased3); addGenotypeTests(site, haploidNoPhase, gPhased); addGenotypeTests(site, haploidNoPhase, gPhased2); addGenotypeTests(site, haploidNoPhase, gPhased3); addGenotypeTests(site, haploidNoPhase, gPhased, gPhased2); addGenotypeTests(site, haploidNoPhase, gPhased, gPhased3); addGenotypeTests(site, haploidNoPhase, gPhased2, gPhased3); addGenotypeTests(site, haploidNoPhase, gPhased, gPhased2, gPhased3); final Genotype gUnphasedTet = new GenotypeBuilder("gUnphasedTet", Arrays.asList(ref, alt1, ref, alt1)).make(); final Genotype gPhasedTet = new GenotypeBuilder("gPhasedTet", Arrays.asList(ref, alt1, alt1, alt1)) .phased(true) .make(); addGenotypeTests(site, gUnphasedTet, gPhasedTet); } if (ENABLE_PL_TESTS) { if (site.getNAlleles() == 2) { // testing PLs addGenotypeTests( site, GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {0, -1, -2}), GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[] {0, -2, -3})); addGenotypeTests( site, GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {-1, 0, -2}), GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[] {0, -2, -3})); addGenotypeTests( site, GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {-1, 0, -2}), GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[] {0, -2000, -1000})); addGenotypeTests( site, // missing PLs GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {-1, 0, -2}), GenotypeBuilder.create("g2", Arrays.asList(ref, ref))); } else if (site.getNAlleles() == 3) { // testing PLs addGenotypeTests( site, GenotypeBuilder.create( "g1", Arrays.asList(ref, ref), new double[] {0, -1, -2, -3, -4, -5}), GenotypeBuilder.create( "g2", Arrays.asList(ref, ref), new double[] {0, -2, -3, -4, -5, -6})); } } // test attributes addGenotypeTests(site, attr("g1", ref, "INT1", 1), attr("g2", ref, "INT1", 2)); addGenotypeTests(site, attr("g1", ref, "INT1", 1), attr("g2", ref, "INT1")); addGenotypeTests(site, attr("g1", ref, "INT3", 1, 2, 3), attr("g2", ref, "INT3", 4, 5, 6)); addGenotypeTests(site, attr("g1", ref, "INT3", 1, 2, 3), attr("g2", ref, "INT3")); addGenotypeTests( site, attr("g1", ref, "INT20", TWENTY_INTS), attr("g2", ref, "INT20", TWENTY_INTS)); if (ENABLE_VARARRAY_TESTS) { addGenotypeTests( site, attr("g1", ref, "INT.VAR", 1, 2, 3), attr("g2", ref, "INT.VAR", 4, 5), attr("g3", ref, "INT.VAR", 6)); addGenotypeTests( site, attr("g1", ref, "INT.VAR", 1, 2, 3), attr("g2", ref, "INT.VAR"), attr("g3", ref, "INT.VAR", 5)); } addGenotypeTests(site, attr("g1", ref, "FLOAT1", 1.0), attr("g2", ref, "FLOAT1", 2.0)); addGenotypeTests(site, attr("g1", ref, "FLOAT1", 1.0), attr("g2", ref, "FLOAT1")); addGenotypeTests( site, attr("g1", ref, "FLOAT3", 1.0, 2.0, 3.0), attr("g2", ref, "FLOAT3", 4.0, 5.0, 6.0)); addGenotypeTests(site, attr("g1", ref, "FLOAT3", 1.0, 2.0, 3.0), attr("g2", ref, "FLOAT3")); if (ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS) { // // // TESTING MULTIPLE SIZED LISTS IN THE GENOTYPE FIELD // // addGenotypeTests( site, attr("g1", ref, "GS", Arrays.asList("S1", "S2")), attr("g2", ref, "GS", Arrays.asList("S3", "S4"))); addGenotypeTests( site, // g1 is missing the string, and g2 is missing FLOAT1 attr("g1", ref, "FLOAT1", 1.0), attr("g2", ref, "GS", Arrays.asList("S3", "S4"))); // variable sized lists addGenotypeTests( site, attr("g1", ref, "GV", "S1"), attr("g2", ref, "GV", Arrays.asList("S3", "S4"))); addGenotypeTests( site, attr("g1", ref, "GV", Arrays.asList("S1", "S2")), attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5"))); addGenotypeTests( site, // missing value in varlist of string attr("g1", ref, "FLOAT1", 1.0), attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5"))); } // // // TESTING GENOTYPE FILTERS // // addGenotypeTests( site, new GenotypeBuilder("g1-x", Arrays.asList(ref, ref)).filters("X").make(), new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make()); addGenotypeTests( site, new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(), new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make()); addGenotypeTests( site, new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(), new GenotypeBuilder("g2-xy", Arrays.asList(ref, ref)).filters("X", "Y").make()); addGenotypeTests( site, new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(), new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make(), new GenotypeBuilder("g3-xy", Arrays.asList(ref, ref)).filters("X", "Y").make()); }
/** * Read in a list of ExactCall objects from reader, keeping only those with starts in startsToKeep * or all sites (if this is empty) * * @param reader a just-opened reader sitting at the start of the file * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should * be kept * @param parser a genome loc parser to create genome locs * @return a list of ExactCall objects in reader * @throws IOException */ public static List<ExactCall> readExactLog( final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser) throws IOException { if (reader == null) throw new IllegalArgumentException("reader cannot be null"); if (startsToKeep == null) throw new IllegalArgumentException("startsToKeep cannot be null"); if (parser == null) throw new IllegalArgumentException("GenomeLocParser cannot be null"); List<ExactCall> calls = new LinkedList<ExactCall>(); // skip the header line reader.readLine(); // skip the first "type" line reader.readLine(); while (true) { final VariantContextBuilder builder = new VariantContextBuilder(); final List<Allele> alleles = new ArrayList<Allele>(); final List<Genotype> genotypes = new ArrayList<Genotype>(); final double[] posteriors = new double[2]; final double[] priors = MathUtils.normalizeFromLog10(new double[] {0.5, 0.5}, true); final List<Integer> mle = new ArrayList<Integer>(); final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(); long runtimeNano = -1; GenomeLoc currentLoc = null; while (true) { final String line = reader.readLine(); if (line == null) return calls; final String[] parts = line.split("\t"); final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]); final String variable = parts[1]; final String key = parts[2]; final String value = parts[3]; if (currentLoc == null) currentLoc = lineLoc; if (variable.equals("type")) { if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) { builder.alleles(alleles); final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); builder.genotypes(genotypes); final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[] {})); final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele); calls.add(new ExactCall(builder.make(), runtimeNano, result)); } break; } else if (variable.equals("allele")) { final boolean isRef = key.equals("0"); alleles.add(Allele.create(value, isRef)); } else if (variable.equals("PL")) { final GenotypeBuilder gb = new GenotypeBuilder(key); gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); genotypes.add(gb.make()); } else if (variable.equals("log10PosteriorOfAFEq0")) { posteriors[0] = Double.valueOf(value); } else if (variable.equals("log10PosteriorOfAFGt0")) { posteriors[1] = Double.valueOf(value); } else if (variable.equals("MLE")) { mle.add(Integer.valueOf(value)); } else if (variable.equals("pNonRefByAllele")) { final Allele a = Allele.create(key); log10pNonRefByAllele.put(a, Double.valueOf(value)); } else if (variable.equals("runtime.nano")) { runtimeNano = Long.valueOf(value); } else { // nothing to do } } } }
private Genotype makeG(String sample, Allele a1, Allele a2) { return GenotypeBuilder.create(sample, Arrays.asList(a1, a2)); }