public static void addComplexGenotypesTest() {
    final List<Allele> allAlleles =
        Arrays.asList(
            Allele.create("A", true), Allele.create("C", false), Allele.create("G", false));

    for (int nAlleles : Arrays.asList(2, 3)) {
      for (int highestPloidy : Arrays.asList(1, 2, 3)) {
        // site alleles
        final List<Allele> siteAlleles = allAlleles.subList(0, nAlleles);

        // possible alleles for genotypes
        final List<Allele> possibleGenotypeAlleles = new ArrayList<Allele>(siteAlleles);
        possibleGenotypeAlleles.add(Allele.NO_CALL);

        // there are n^ploidy possible genotypes
        final List<List<Allele>> possibleGenotypes =
            makeAllGenotypes(possibleGenotypeAlleles, highestPloidy);
        final int nPossibleGenotypes = possibleGenotypes.size();

        VariantContextBuilder vb = new VariantContextBuilder("unittest", "1", 1, 1, siteAlleles);

        // first test -- create n copies of each genotype
        for (int i = 0; i < nPossibleGenotypes; i++) {
          final List<Genotype> samples = new ArrayList<Genotype>(3);
          samples.add(GenotypeBuilder.create("sample" + i, possibleGenotypes.get(i)));
          add(vb.genotypes(samples));
        }

        // second test -- create one sample with each genotype
        {
          final List<Genotype> samples = new ArrayList<Genotype>(nPossibleGenotypes);
          for (int i = 0; i < nPossibleGenotypes; i++) {
            samples.add(GenotypeBuilder.create("sample" + i, possibleGenotypes.get(i)));
          }
          add(vb.genotypes(samples));
        }

        // test mixed ploidy
        for (int i = 0; i < nPossibleGenotypes; i++) {
          for (int ploidy = 1; ploidy < highestPloidy; ploidy++) {
            final List<Genotype> samples = new ArrayList<Genotype>(highestPloidy);
            final List<Allele> genotype = possibleGenotypes.get(i).subList(0, ploidy);
            samples.add(GenotypeBuilder.create("sample" + i, genotype));
            add(vb.genotypes(samples));
          }
        }
      }
    }
  }
 private static Genotype attr(
     final String name, final Allele ref, final String key, final Object... value) {
   if (value.length == 0) return GenotypeBuilder.create(name, Arrays.asList(ref, ref));
   else {
     final Object toAdd = value.length == 1 ? value[0] : Arrays.asList(value);
     return new GenotypeBuilder(name, Arrays.asList(ref, ref)).attribute(key, toAdd).make();
   }
 }
Exemplo n.º 3
0
  /**
   * Takes the interval, finds it in the stash, prints it to the VCF
   *
   * @param stats The statistics of the interval
   * @param refAllele the reference allele
   */
  private void outputStatsToVCF(final IntervalStratification stats, final Allele refAllele) {
    GenomeLoc interval = stats.getInterval();

    final List<Allele> alleles = new ArrayList<>();
    final Map<String, Object> attributes = new HashMap<>();
    final ArrayList<Genotype> genotypes = new ArrayList<>();

    for (String sample : samples) {
      final GenotypeBuilder gb = new GenotypeBuilder(sample);

      SampleStratification sampleStat = stats.getSampleStatistics(sample);
      gb.attribute(
          GATKVCFConstants.AVG_INTERVAL_DP_BY_SAMPLE_KEY,
          sampleStat.averageCoverage(interval.size()));
      gb.attribute(GATKVCFConstants.LOW_COVERAGE_LOCI, sampleStat.getNLowCoveredLoci());
      gb.attribute(GATKVCFConstants.ZERO_COVERAGE_LOCI, sampleStat.getNUncoveredLoci());
      gb.filters(statusToStrings(stats.getSampleStatistics(sample).callableStatuses(), false));

      genotypes.add(gb.make());
    }
    alleles.add(refAllele);
    alleles.add(SYMBOLIC_ALLELE);
    VariantContextBuilder vcb =
        new VariantContextBuilder(
            "DiagnoseTargets",
            interval.getContig(),
            interval.getStart(),
            interval.getStop(),
            alleles);

    vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR);
    vcb.filters(new LinkedHashSet<>(statusToStrings(stats.callableStatuses(), true)));

    attributes.put(VCFConstants.END_KEY, interval.getStop());
    attributes.put(GATKVCFConstants.AVG_INTERVAL_DP_KEY, stats.averageCoverage(interval.size()));
    attributes.put(GATKVCFConstants.INTERVAL_GC_CONTENT_KEY, stats.gcContent());

    vcb = vcb.attributes(attributes);
    vcb = vcb.genotypes(genotypes);

    vcfWriter.add(vcb.make());
  }
  private static void addGenotypesAndGTests() {
    //        for ( final int ploidy : Arrays.asList(2)) {
    for (final int ploidy : Arrays.asList(1, 2, 3, 4, 5)) {
      final List<List<String>> alleleCombinations =
          Arrays.asList(
              Arrays.asList("A"),
              Arrays.asList("A", "C"),
              Arrays.asList("A", "C", "G"),
              Arrays.asList("A", "C", "G", "T"));

      for (final List<String> alleles : alleleCombinations) {
        final VariantContextBuilder vcb = builder().alleles(alleles);
        final VariantContext site = vcb.make();
        final int nAlleles = site.getNAlleles();
        final Allele ref = site.getReference();

        // base genotype is ref/.../ref up to ploidy
        final List<Allele> baseGenotype = new ArrayList<Allele>(ploidy);
        for (int i = 0; i < ploidy; i++) baseGenotype.add(ref);
        final int nPLs = GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy);

        // ada is 0, 1, ..., nAlleles - 1
        final List<Integer> ada = new ArrayList<Integer>(nAlleles);
        for (int i = 0; i < nAlleles - 1; i++) ada.add(i);

        // pl is 0, 1, ..., up to nPLs (complex calc of nAlleles and ploidy)
        final int[] pl = new int[nPLs];
        for (int i = 0; i < pl.length; i++) pl[i] = i;

        final GenotypeBuilder gb = new GenotypeBuilder("ADA_PL_SAMPLE");
        gb.alleles(baseGenotype);
        gb.PL(pl);
        gb.attribute("ADA", nAlleles == 2 ? ada.get(0) : ada);
        vcb.genotypes(gb.make());

        add(vcb);
      }
    }
  }
  private static void addGenotypes(final VariantContext site) {
    // test ref/ref
    final Allele ref = site.getReference();
    final Allele alt1 = site.getNAlleles() > 1 ? site.getAlternateAllele(0) : null;
    final Genotype homRef = GenotypeBuilder.create("homRef", Arrays.asList(ref, ref));
    addGenotypeTests(site, homRef);

    if (alt1 != null) {
      final Genotype het = GenotypeBuilder.create("het", Arrays.asList(ref, alt1));
      final Genotype homVar = GenotypeBuilder.create("homVar", Arrays.asList(alt1, alt1));
      addGenotypeTests(site, homRef, het);
      addGenotypeTests(site, homRef, het, homVar);

      // test no GT at all
      addGenotypeTests(
          site, new GenotypeBuilder("noGT", new ArrayList<Allele>(0)).attribute("INT1", 10).make());

      final List<Allele> noCall = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);

      // ploidy
      if (ENABLE_PLOIDY_TESTS) {
        addGenotypeTests(
            site,
            GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
            GenotypeBuilder.create("hap", Arrays.asList(ref)));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("noCall", noCall),
            GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
            GenotypeBuilder.create("hap", Arrays.asList(ref)));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("noCall", noCall),
            GenotypeBuilder.create("noCall2", noCall),
            GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
            GenotypeBuilder.create("hap", Arrays.asList(ref)));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
            GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1)));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("noCall", noCall),
            GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
            GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1)));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("noCall", noCall),
            GenotypeBuilder.create("noCall2", noCall),
            GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
            GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1)));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("nocall", noCall),
            GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
            GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1)));
      }

      //
      //
      // TESTING PHASE
      //
      //
      final Genotype gUnphased = new GenotypeBuilder("gUnphased", Arrays.asList(ref, alt1)).make();
      final Genotype gPhased =
          new GenotypeBuilder("gPhased", Arrays.asList(ref, alt1)).phased(true).make();
      final Genotype gPhased2 =
          new GenotypeBuilder("gPhased2", Arrays.asList(alt1, alt1)).phased(true).make();
      final Genotype gPhased3 =
          new GenotypeBuilder("gPhased3", Arrays.asList(ref, ref)).phased(true).make();
      final Genotype haploidNoPhase =
          new GenotypeBuilder("haploidNoPhase", Arrays.asList(ref)).make();
      addGenotypeTests(site, gUnphased, gPhased);
      addGenotypeTests(site, gUnphased, gPhased2);
      addGenotypeTests(site, gUnphased, gPhased3);
      addGenotypeTests(site, gPhased, gPhased2);
      addGenotypeTests(site, gPhased, gPhased3);
      addGenotypeTests(site, gPhased2, gPhased3);
      addGenotypeTests(site, haploidNoPhase, gPhased);
      addGenotypeTests(site, haploidNoPhase, gPhased2);
      addGenotypeTests(site, haploidNoPhase, gPhased3);
      addGenotypeTests(site, haploidNoPhase, gPhased, gPhased2);
      addGenotypeTests(site, haploidNoPhase, gPhased, gPhased3);
      addGenotypeTests(site, haploidNoPhase, gPhased2, gPhased3);
      addGenotypeTests(site, haploidNoPhase, gPhased, gPhased2, gPhased3);

      final Genotype gUnphasedTet =
          new GenotypeBuilder("gUnphasedTet", Arrays.asList(ref, alt1, ref, alt1)).make();
      final Genotype gPhasedTet =
          new GenotypeBuilder("gPhasedTet", Arrays.asList(ref, alt1, alt1, alt1))
              .phased(true)
              .make();
      addGenotypeTests(site, gUnphasedTet, gPhasedTet);
    }

    if (ENABLE_PL_TESTS) {
      if (site.getNAlleles() == 2) {
        // testing PLs
        addGenotypeTests(
            site,
            GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {0, -1, -2}),
            GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[] {0, -2, -3}));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {-1, 0, -2}),
            GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[] {0, -2, -3}));

        addGenotypeTests(
            site,
            GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {-1, 0, -2}),
            GenotypeBuilder.create("g2", Arrays.asList(ref, ref), new double[] {0, -2000, -1000}));

        addGenotypeTests(
            site, // missing PLs
            GenotypeBuilder.create("g1", Arrays.asList(ref, ref), new double[] {-1, 0, -2}),
            GenotypeBuilder.create("g2", Arrays.asList(ref, ref)));
      } else if (site.getNAlleles() == 3) {
        // testing PLs
        addGenotypeTests(
            site,
            GenotypeBuilder.create(
                "g1", Arrays.asList(ref, ref), new double[] {0, -1, -2, -3, -4, -5}),
            GenotypeBuilder.create(
                "g2", Arrays.asList(ref, ref), new double[] {0, -2, -3, -4, -5, -6}));
      }
    }

    // test attributes
    addGenotypeTests(site, attr("g1", ref, "INT1", 1), attr("g2", ref, "INT1", 2));
    addGenotypeTests(site, attr("g1", ref, "INT1", 1), attr("g2", ref, "INT1"));
    addGenotypeTests(site, attr("g1", ref, "INT3", 1, 2, 3), attr("g2", ref, "INT3", 4, 5, 6));
    addGenotypeTests(site, attr("g1", ref, "INT3", 1, 2, 3), attr("g2", ref, "INT3"));

    addGenotypeTests(
        site, attr("g1", ref, "INT20", TWENTY_INTS), attr("g2", ref, "INT20", TWENTY_INTS));

    if (ENABLE_VARARRAY_TESTS) {
      addGenotypeTests(
          site,
          attr("g1", ref, "INT.VAR", 1, 2, 3),
          attr("g2", ref, "INT.VAR", 4, 5),
          attr("g3", ref, "INT.VAR", 6));
      addGenotypeTests(
          site,
          attr("g1", ref, "INT.VAR", 1, 2, 3),
          attr("g2", ref, "INT.VAR"),
          attr("g3", ref, "INT.VAR", 5));
    }

    addGenotypeTests(site, attr("g1", ref, "FLOAT1", 1.0), attr("g2", ref, "FLOAT1", 2.0));
    addGenotypeTests(site, attr("g1", ref, "FLOAT1", 1.0), attr("g2", ref, "FLOAT1"));
    addGenotypeTests(
        site, attr("g1", ref, "FLOAT3", 1.0, 2.0, 3.0), attr("g2", ref, "FLOAT3", 4.0, 5.0, 6.0));
    addGenotypeTests(site, attr("g1", ref, "FLOAT3", 1.0, 2.0, 3.0), attr("g2", ref, "FLOAT3"));

    if (ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS) {
      //
      //
      // TESTING MULTIPLE SIZED LISTS IN THE GENOTYPE FIELD
      //
      //
      addGenotypeTests(
          site,
          attr("g1", ref, "GS", Arrays.asList("S1", "S2")),
          attr("g2", ref, "GS", Arrays.asList("S3", "S4")));

      addGenotypeTests(
          site, // g1 is missing the string, and g2 is missing FLOAT1
          attr("g1", ref, "FLOAT1", 1.0),
          attr("g2", ref, "GS", Arrays.asList("S3", "S4")));

      // variable sized lists
      addGenotypeTests(
          site, attr("g1", ref, "GV", "S1"), attr("g2", ref, "GV", Arrays.asList("S3", "S4")));

      addGenotypeTests(
          site,
          attr("g1", ref, "GV", Arrays.asList("S1", "S2")),
          attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5")));

      addGenotypeTests(
          site, // missing value in varlist of string
          attr("g1", ref, "FLOAT1", 1.0),
          attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5")));
    }

    //
    //
    // TESTING GENOTYPE FILTERS
    //
    //
    addGenotypeTests(
        site,
        new GenotypeBuilder("g1-x", Arrays.asList(ref, ref)).filters("X").make(),
        new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make());
    addGenotypeTests(
        site,
        new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
        new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make());
    addGenotypeTests(
        site,
        new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
        new GenotypeBuilder("g2-xy", Arrays.asList(ref, ref)).filters("X", "Y").make());
    addGenotypeTests(
        site,
        new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
        new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make(),
        new GenotypeBuilder("g3-xy", Arrays.asList(ref, ref)).filters("X", "Y").make());
  }
Exemplo n.º 6
0
  /**
   * Read in a list of ExactCall objects from reader, keeping only those with starts in startsToKeep
   * or all sites (if this is empty)
   *
   * @param reader a just-opened reader sitting at the start of the file
   * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should
   *     be kept
   * @param parser a genome loc parser to create genome locs
   * @return a list of ExactCall objects in reader
   * @throws IOException
   */
  public static List<ExactCall> readExactLog(
      final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser)
      throws IOException {
    if (reader == null) throw new IllegalArgumentException("reader cannot be null");
    if (startsToKeep == null) throw new IllegalArgumentException("startsToKeep cannot be null");
    if (parser == null) throw new IllegalArgumentException("GenomeLocParser cannot be null");

    List<ExactCall> calls = new LinkedList<ExactCall>();

    // skip the header line
    reader.readLine();

    // skip the first "type" line
    reader.readLine();

    while (true) {
      final VariantContextBuilder builder = new VariantContextBuilder();
      final List<Allele> alleles = new ArrayList<Allele>();
      final List<Genotype> genotypes = new ArrayList<Genotype>();
      final double[] posteriors = new double[2];
      final double[] priors = MathUtils.normalizeFromLog10(new double[] {0.5, 0.5}, true);
      final List<Integer> mle = new ArrayList<Integer>();
      final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
      long runtimeNano = -1;

      GenomeLoc currentLoc = null;
      while (true) {
        final String line = reader.readLine();
        if (line == null) return calls;

        final String[] parts = line.split("\t");
        final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]);
        final String variable = parts[1];
        final String key = parts[2];
        final String value = parts[3];

        if (currentLoc == null) currentLoc = lineLoc;

        if (variable.equals("type")) {
          if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) {
            builder.alleles(alleles);
            final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
            builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
            builder.genotypes(genotypes);
            final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[] {}));
            final AFCalcResult result =
                new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
            calls.add(new ExactCall(builder.make(), runtimeNano, result));
          }
          break;
        } else if (variable.equals("allele")) {
          final boolean isRef = key.equals("0");
          alleles.add(Allele.create(value, isRef));
        } else if (variable.equals("PL")) {
          final GenotypeBuilder gb = new GenotypeBuilder(key);
          gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
          genotypes.add(gb.make());
        } else if (variable.equals("log10PosteriorOfAFEq0")) {
          posteriors[0] = Double.valueOf(value);
        } else if (variable.equals("log10PosteriorOfAFGt0")) {
          posteriors[1] = Double.valueOf(value);
        } else if (variable.equals("MLE")) {
          mle.add(Integer.valueOf(value));
        } else if (variable.equals("pNonRefByAllele")) {
          final Allele a = Allele.create(key);
          log10pNonRefByAllele.put(a, Double.valueOf(value));
        } else if (variable.equals("runtime.nano")) {
          runtimeNano = Long.valueOf(value);
        } else {
          // nothing to do
        }
      }
    }
  }
Exemplo n.º 7
0
 private Genotype makeG(String sample, Allele a1, Allele a2) {
   return GenotypeBuilder.create(sample, Arrays.asList(a1, a2));
 }