Ejemplo n.º 1
0
  // necessary to not overload equals for genotypes
  private void assertGenotypesAreMostlyEqual(GenotypesContext actual, GenotypesContext expected) {
    if (actual == expected) {
      return;
    }

    if (actual == null || expected == null) {
      Assert.fail("Maps not equal: expected: " + expected + " and actual: " + actual);
    }

    if (actual.size() != expected.size()) {
      Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size());
    }

    for (Genotype value : actual) {
      Genotype expectedValue = expected.get(value.getSampleName());

      Assert.assertEquals(
          value.getAlleles(), expectedValue.getAlleles(), "Alleles in Genotype aren't equal");
      Assert.assertEquals(value.getGQ(), expectedValue.getGQ(), "GQ values aren't equal");
      Assert.assertEquals(
          value.hasLikelihoods(),
          expectedValue.hasLikelihoods(),
          "Either both have likelihoods or both not");
      if (value.hasLikelihoods())
        Assert.assertEquals(
            value.getLikelihoods().getAsVector(),
            expectedValue.getLikelihoods().getAsVector(),
            "Genotype likelihoods aren't equal");
    }
  }
Ejemplo n.º 2
0
  protected final void printCallInfo(
      final VariantContext vc,
      final double[] log10AlleleFrequencyPriors,
      final long runtimeNano,
      final AFCalcResult result) {
    printCallElement(vc, "type", "ignore", vc.getType());

    int allelei = 0;
    for (final Allele a : vc.getAlleles())
      printCallElement(vc, "allele", allelei++, a.getDisplayString());

    for (final Genotype g : vc.getGenotypes())
      printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());

    for (int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++)
      printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);

    printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
    printCallElement(vc, "log10PosteriorOfAFEq0", "ignore", result.getLog10PosteriorOfAFEq0());
    printCallElement(vc, "log10PosteriorOfAFGt0", "ignore", result.getLog10PosteriorOfAFGT0());

    for (final Allele allele : result.getAllelesUsedInGenotyping()) {
      if (allele.isNonReference()) {
        printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele));
        printCallElement(
            vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele));
      }
    }

    callReport.flush();
  }
  private static void mergeGenotypes(
      GenotypesContext mergedGenotypes,
      VariantContext oneVC,
      AlleleMapper alleleMapping,
      boolean uniqifySamples) {
    for (Genotype g : oneVC.getGenotypes()) {
      String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples);
      if (!mergedGenotypes.containsSample(name)) {
        // only add if the name is new
        Genotype newG = g;

        if (uniqifySamples || alleleMapping.needsRemapping()) {
          final List<Allele> alleles =
              alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles();
          newG =
              new Genotype(
                  name,
                  alleles,
                  g.getLog10PError(),
                  g.getFilters(),
                  g.getAttributes(),
                  g.isPhased());
        }

        mergedGenotypes.add(newG);
      }
    }
  }
  public static void testReaderWriterWithMissingGenotypes(
      final VariantContextIOTest tester, final VariantContextTestData data) throws IOException {
    final int nSamples = data.header.getNGenotypeSamples();
    if (nSamples > 2) {
      for (final VariantContext vc : data.vcs)
        if (vc.isSymbolic())
          // cannot handle symbolic alleles because they may be weird non-call VCFs
          return;

      final File tmpFile = File.createTempFile("testReaderWriter", tester.getExtension());
      tmpFile.deleteOnExit();

      // write expected to disk
      final EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
      final VariantContextWriter writer = tester.makeWriter(tmpFile, options);

      final Set<String> samplesInVCF = new HashSet<String>(data.header.getGenotypeSamples());
      final List<String> missingSamples = Arrays.asList("MISSING1", "MISSING2");
      final List<String> allSamples = new ArrayList<String>(missingSamples);
      allSamples.addAll(samplesInVCF);

      final VCFHeader header = new VCFHeader(data.header.getMetaDataInInputOrder(), allSamples);
      writeVCsToFile(writer, header, data.vcs);

      // ensure writing of expected == actual
      final VariantContextContainer p = tester.readAllVCs(tmpFile);
      final Iterable<VariantContext> actual = p.getVCs();

      int i = 0;
      for (final VariantContext readVC : actual) {
        if (readVC == null) continue; // sometimes we read null records...
        final VariantContext expected = data.vcs.get(i++);
        for (final Genotype g : readVC.getGenotypes()) {
          Assert.assertTrue(allSamples.contains(g.getSampleName()));
          if (samplesInVCF.contains(g.getSampleName())) {
            assertEquals(g, expected.getGenotype(g.getSampleName()));
          } else {
            // missing
            Assert.assertTrue(g.isNoCall());
          }
        }
      }
    }
  }
Ejemplo n.º 5
0
  static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) {
    // Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1:
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom
      return false;
    }

    return true;
  }
 public static Genotype removePLs(Genotype g) {
   Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
   attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
   attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
   return new Genotype(
       g.getSampleName(),
       g.getAlleles(),
       g.getLog10PError(),
       g.filtersWereApplied() ? g.getFilters() : null,
       attrs,
       g.isPhased());
 }
  public static void assertEquals(final Genotype actual, final Genotype expected) {
    Assert.assertEquals(actual.getSampleName(), expected.getSampleName(), "Genotype names");
    Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "Genotype alleles");
    Assert.assertEquals(
        actual.getGenotypeString(), expected.getGenotypeString(), "Genotype string");
    Assert.assertEquals(actual.getType(), expected.getType(), "Genotype type");

    // filters are the same
    Assert.assertEquals(actual.getFilters(), expected.getFilters(), "Genotype fields");
    Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "Genotype isFiltered");

    // inline attributes
    Assert.assertEquals(actual.getDP(), expected.getDP(), "Genotype dp");
    Assert.assertTrue(Arrays.equals(actual.getAD(), expected.getAD()));
    Assert.assertEquals(actual.getGQ(), expected.getGQ(), "Genotype gq");
    Assert.assertEquals(actual.hasPL(), expected.hasPL(), "Genotype hasPL");
    Assert.assertEquals(actual.hasAD(), expected.hasAD(), "Genotype hasAD");
    Assert.assertEquals(actual.hasGQ(), expected.hasGQ(), "Genotype hasGQ");
    Assert.assertEquals(actual.hasDP(), expected.hasDP(), "Genotype hasDP");

    Assert.assertEquals(
        actual.hasLikelihoods(), expected.hasLikelihoods(), "Genotype haslikelihoods");
    Assert.assertEquals(
        actual.getLikelihoodsString(),
        expected.getLikelihoodsString(),
        "Genotype getlikelihoodsString");
    Assert.assertEquals(
        actual.getLikelihoods(), expected.getLikelihoods(), "Genotype getLikelihoods");
    Assert.assertTrue(Arrays.equals(actual.getPL(), expected.getPL()));

    Assert.assertEquals(
        actual.getPhredScaledQual(), expected.getPhredScaledQual(), "Genotype phredScaledQual");
    assertAttributesEquals(actual.getExtendedAttributes(), expected.getExtendedAttributes());
    Assert.assertEquals(actual.isPhased(), expected.isPhased(), "Genotype isPhased");
    Assert.assertEquals(actual.getPloidy(), expected.getPloidy(), "Genotype getPloidy");
  }
Ejemplo n.º 8
0
  static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      List<Allele> site1Alleles = gt1.getAlleles();
      List<Allele> site2Alleles = gt2.getAlleles();

      Iterator<Allele> all2It = site2Alleles.iterator();
      for (Allele all1 : site1Alleles) {
        Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()

        if (all1.isNonReference() && all2.isNonReference()) // corresponding alleles are alternate
        return true;
      }
    }

    return false;
  }
Ejemplo n.º 9
0
  static boolean doubleAllelesSegregatePerfectlyAmongSamples(
      VariantContext vc1, VariantContext vc2) {
    // Check that Alleles at vc1 and at vc2 always segregate together in all samples (including
    // reference):
    Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
    Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>();

    // Note the segregation of the alleles for the reference genome:
    allele1ToAllele2.put(vc1.getReference(), vc2.getReference());
    allele2ToAllele1.put(vc2.getReference(), vc1.getReference());

    // Note the segregation of the alleles for each sample (and check that it is consistent with the
    // reference and all previous samples).
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      List<Allele> site1Alleles = gt1.getAlleles();
      List<Allele> site2Alleles = gt2.getAlleles();

      Iterator<Allele> all2It = site2Alleles.iterator();
      for (Allele all1 : site1Alleles) {
        Allele all2 = all2It.next();

        Allele all1To2 = allele1ToAllele2.get(all1);
        if (all1To2 == null) allele1ToAllele2.put(all1, all2);
        else if (!all1To2.equals(all2)) // all1 segregates with two different alleles at site 2
        return false;

        Allele all2To1 = allele2ToAllele1.get(all2);
        if (all2To1 == null) allele2ToAllele1.put(all2, all1);
        else if (!all2To1.equals(all1)) // all2 segregates with two different alleles at site 1
        return false;
      }
    }

    return true;
  }
  public static VariantContextBuilder pruneVariantContext(
      final VariantContextBuilder builder, Collection<String> keysToPreserve) {
    final VariantContext vc = builder.make();
    if (keysToPreserve == null) keysToPreserve = Collections.emptyList();

    // VC info
    final Map<String, Object> attributes = subsetAttributes(vc.commonInfo, keysToPreserve);

    // Genotypes
    final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
    for (final Genotype g : vc.getGenotypes()) {
      Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve);
      genotypes.add(
          new Genotype(
              g.getSampleName(),
              g.getAlleles(),
              g.getLog10PError(),
              g.getFilters(),
              genotypeAttributes,
              g.isPhased()));
    }

    return builder.genotypes(genotypes).attributes(attributes);
  }
  public static VariantContext createVariantContextWithPaddedAlleles(
      VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
    // see if we need to pad common reference base from all alleles
    boolean padVC;

    // We need to pad a VC with a common base if the length of the reference allele is less than the
    // length of the VariantContext.
    // This happens because the position of e.g. an indel is always one before the actual event (as
    // per VCF convention).
    long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1;
    if (inputVC.hasSymbolicAlleles()) padVC = true;
    else if (inputVC.getReference().length() == locLength) padVC = false;
    else if (inputVC.getReference().length() == locLength - 1) padVC = true;
    else
      throw new IllegalArgumentException(
          "Badly formed variant context at location "
              + String.valueOf(inputVC.getStart())
              + " in contig "
              + inputVC.getChr()
              + ". Reference length must be at most one base shorter than location size");

    // nothing to do if we don't need to pad bases
    if (padVC) {
      if (!inputVC.hasReferenceBaseForIndel())
        throw new ReviewedStingException(
            "Badly formed variant context at location "
                + inputVC.getChr()
                + ":"
                + inputVC.getStart()
                + "; no padded reference base is available.");

      Byte refByte = inputVC.getReferenceBaseForIndel();

      List<Allele> alleles = new ArrayList<Allele>();

      for (Allele a : inputVC.getAlleles()) {
        // get bases for current allele and create a new one with trimmed bases
        if (a.isSymbolic()) {
          alleles.add(a);
        } else {
          String newBases;
          if (refBaseShouldBeAppliedToEndOfAlleles)
            newBases = a.getBaseString() + new String(new byte[] {refByte});
          else newBases = new String(new byte[] {refByte}) + a.getBaseString();
          alleles.add(Allele.create(newBases, a.isReference()));
        }
      }

      // now we can recreate new genotypes with trimmed alleles
      GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
      for (final Genotype g : inputVC.getGenotypes()) {
        List<Allele> inAlleles = g.getAlleles();
        List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
        for (Allele a : inAlleles) {
          if (a.isCalled()) {
            if (a.isSymbolic()) {
              newGenotypeAlleles.add(a);
            } else {
              String newBases;
              if (refBaseShouldBeAppliedToEndOfAlleles)
                newBases = a.getBaseString() + new String(new byte[] {refByte});
              else newBases = new String(new byte[] {refByte}) + a.getBaseString();
              newGenotypeAlleles.add(Allele.create(newBases, a.isReference()));
            }
          } else {
            // add no-call allele
            newGenotypeAlleles.add(Allele.NO_CALL);
          }
        }
        genotypes.add(
            new Genotype(
                g.getSampleName(),
                newGenotypeAlleles,
                g.getLog10PError(),
                g.getFilters(),
                g.getAttributes(),
                g.isPhased()));
      }

      return new VariantContextBuilder(inputVC).alleles(alleles).genotypes(genotypes).make();
    } else return inputVC;
  }
 public static final Set<String> genotypeNames(final Collection<Genotype> genotypes) {
   final Set<String> names = new HashSet<String>(genotypes.size());
   for (final Genotype g : genotypes) names.add(g.getSampleName());
   return names;
 }
Ejemplo n.º 13
0
  static VariantContext reallyMergeIntoMNP(
      VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
    int startInter = vc1.getEnd() + 1;
    int endInter = vc2.getStart() - 1;
    byte[] intermediateBases = null;
    if (startInter <= endInter) {
      intermediateBases =
          referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases();
      StringUtil.toUpperCase(intermediateBases);
    }
    MergedAllelesData mergeData =
        new MergedAllelesData(
            intermediateBases, vc1, vc2); // ensures that the reference allele is added

    GenotypesContext mergedGenotypes = GenotypesContext.create();
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      List<Allele> site1Alleles = gt1.getAlleles();
      List<Allele> site2Alleles = gt2.getAlleles();

      List<Allele> mergedAllelesForSample = new LinkedList<Allele>();

      /* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records,
        we preserve phase information (if any) relative to whatever precedes vc1:
      */
      Iterator<Allele> all2It = site2Alleles.iterator();
      for (Allele all1 : site1Alleles) {
        Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()

        Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2);
        mergedAllelesForSample.add(mergedAllele);
      }

      double mergedGQ = Math.max(gt1.getLog10PError(), gt2.getLog10PError());
      Set<String> mergedGtFilters =
          new HashSet<
              String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered

      Map<String, Object> mergedGtAttribs = new HashMap<String, Object>();
      PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2);
      if (phaseQual.PQ != null) mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);

      Genotype mergedGt =
          new Genotype(
              gt1.getSampleName(),
              mergedAllelesForSample,
              mergedGQ,
              mergedGtFilters,
              mergedGtAttribs,
              phaseQual.isPhased);
      mergedGenotypes.add(mergedGt);
    }

    String mergedName = mergeVariantContextNames(vc1.getSource(), vc2.getSource());
    double mergedLog10PError = Math.min(vc1.getLog10PError(), vc2.getLog10PError());
    Set<String> mergedFilters =
        new HashSet<
            String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered
    Map<String, Object> mergedAttribs = mergeVariantContextAttributes(vc1, vc2);

    // ids
    List<String> mergedIDs = new ArrayList<String>();
    if (vc1.hasID()) mergedIDs.add(vc1.getID());
    if (vc2.hasID()) mergedIDs.add(vc2.getID());
    String mergedID =
        mergedIDs.isEmpty()
            ? VCFConstants.EMPTY_ID_FIELD
            : Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs);

    VariantContextBuilder mergedBuilder =
        new VariantContextBuilder(
                mergedName,
                vc1.getChr(),
                vc1.getStart(),
                vc2.getEnd(),
                mergeData.getAllMergedAlleles())
            .id(mergedID)
            .genotypes(mergedGenotypes)
            .log10PError(mergedLog10PError)
            .filters(mergedFilters)
            .attributes(mergedAttribs);
    VariantContextUtils.calculateChromosomeCounts(mergedBuilder, true);
    return mergedBuilder.make();
  }