/**
   * Returns a context identical to this with the REF and ALT alleles reverse complemented.
   *
   * @param vc variant context
   * @return new vc
   */
  public static VariantContext reverseComplement(VariantContext vc) {
    // create a mapping from original allele to reverse complemented allele
    HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
    for (Allele originalAllele : vc.getAlleles()) {
      Allele newAllele;
      if (originalAllele.isNoCall() || originalAllele.isNull()) newAllele = originalAllele;
      else
        newAllele =
            Allele.create(
                BaseUtils.simpleReverseComplement(originalAllele.getBases()),
                originalAllele.isReference());
      alleleMap.put(originalAllele, newAllele);
    }

    // create new Genotype objects
    GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
    for (final Genotype genotype : vc.getGenotypes()) {
      List<Allele> newAlleles = new ArrayList<Allele>();
      for (Allele allele : genotype.getAlleles()) {
        Allele newAllele = alleleMap.get(allele);
        if (newAllele == null) newAllele = Allele.NO_CALL;
        newAlleles.add(newAllele);
      }
      newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
    }

    return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
  }
  private static void mergeGenotypes(
      GenotypesContext mergedGenotypes,
      VariantContext oneVC,
      AlleleMapper alleleMapping,
      boolean uniqifySamples) {
    for (Genotype g : oneVC.getGenotypes()) {
      String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples);
      if (!mergedGenotypes.containsSample(name)) {
        // only add if the name is new
        Genotype newG = g;

        if (uniqifySamples || alleleMapping.needsRemapping()) {
          final List<Allele> alleles =
              alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles();
          newG =
              new Genotype(
                  name,
                  alleles,
                  g.getLog10PError(),
                  g.getFilters(),
                  g.getAttributes(),
                  g.isPhased());
        }

        mergedGenotypes.add(newG);
      }
    }
  }
  public static GenotypesContext stripPLs(GenotypesContext genotypes) {
    GenotypesContext newGs = GenotypesContext.create(genotypes.size());

    for (final Genotype g : genotypes) {
      newGs.add(g.hasLikelihoods() ? removePLs(g) : g);
    }

    return newGs;
  }
예제 #4
0
  @Test
  public void testFixReverseComplementedGenotypes() {

    final Allele refA = Allele.create("A", true);
    final Allele altC = Allele.create("C", false);
    final GenotypesContext originalGenotypes = GenotypesContext.create(3);
    originalGenotypes.add(new GenotypeBuilder("homref").alleles(Arrays.asList(refA, refA)).make());
    originalGenotypes.add(new GenotypeBuilder("het").alleles(Arrays.asList(refA, altC)).make());
    originalGenotypes.add(new GenotypeBuilder("homvar").alleles(Arrays.asList(altC, altC)).make());

    final Allele refT = Allele.create("T", true);
    final Allele altG = Allele.create("G", false);
    final GenotypesContext expectedGenotypes = GenotypesContext.create(3);
    expectedGenotypes.add(new GenotypeBuilder("homref").alleles(Arrays.asList(refT, refT)).make());
    expectedGenotypes.add(new GenotypeBuilder("het").alleles(Arrays.asList(refT, altG)).make());
    expectedGenotypes.add(new GenotypeBuilder("homvar").alleles(Arrays.asList(altG, altG)).make());

    final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<Allele, Allele>(2);
    reverseComplementAlleleMap.put(refA, refT);
    reverseComplementAlleleMap.put(altC, altG);
    final GenotypesContext actualGenotypes =
        LiftoverVcf.fixGenotypes(originalGenotypes, reverseComplementAlleleMap);

    for (final String sample : Arrays.asList("homref", "het", "homvar")) {
      final List<Allele> expected = expectedGenotypes.get(sample).getAlleles();
      final List<Allele> actual = actualGenotypes.get(sample).getAlleles();
      Assert.assertEquals(expected.get(0), actual.get(0));
      Assert.assertEquals(expected.get(1), actual.get(1));
    }
  }
  public static VariantContext purgeUnallowedGenotypeAttributes(
      VariantContext vc, Set<String> allowedAttributes) {
    if (allowedAttributes == null) return vc;

    GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
    for (final Genotype genotype : vc.getGenotypes()) {
      Map<String, Object> attrs = new HashMap<String, Object>();
      for (Map.Entry<String, Object> attr : genotype.getAttributes().entrySet()) {
        if (allowedAttributes.contains(attr.getKey())) attrs.put(attr.getKey(), attr.getValue());
      }
      newGenotypes.add(Genotype.modifyAttributes(genotype, attrs));
    }

    return new VariantContextBuilder(vc).genotypes(newGenotypes).make();
  }
  public static VariantContextBuilder pruneVariantContext(
      final VariantContextBuilder builder, Collection<String> keysToPreserve) {
    final VariantContext vc = builder.make();
    if (keysToPreserve == null) keysToPreserve = Collections.emptyList();

    // VC info
    final Map<String, Object> attributes = subsetAttributes(vc.commonInfo, keysToPreserve);

    // Genotypes
    final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
    for (final Genotype g : vc.getGenotypes()) {
      Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve);
      genotypes.add(
          new Genotype(
              g.getSampleName(),
              g.getAlleles(),
              g.getLog10PError(),
              g.getFilters(),
              genotypeAttributes,
              g.isPhased()));
    }

    return builder.genotypes(genotypes).attributes(attributes);
  }
  public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
    // see if we need to trim common reference base from all alleles
    boolean trimVC;

    // We need to trim common reference base from all alleles in all genotypes if a ref base is
    // common to all alleles
    Allele refAllele = inputVC.getReference();
    if (!inputVC.isVariant()) trimVC = false;
    else if (refAllele.isNull()) trimVC = false;
    else {
      trimVC =
          (AbstractVCFCodec.computeForwardClipping(
                  new ArrayList<Allele>(inputVC.getAlternateAlleles()),
                  inputVC.getReference().getDisplayString())
              > 0);
    }

    // nothing to do if we don't need to trim bases
    if (trimVC) {
      List<Allele> alleles = new ArrayList<Allele>();
      GenotypesContext genotypes = GenotypesContext.create();

      // set the reference base for indels in the attributes
      Map<String, Object> attributes = new TreeMap<String, Object>(inputVC.getAttributes());

      Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();

      for (Allele a : inputVC.getAlleles()) {
        if (a.isSymbolic()) {
          alleles.add(a);
          originalToTrimmedAlleleMap.put(a, a);
        } else {
          // get bases for current allele and create a new one with trimmed bases
          byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
          Allele trimmedAllele = Allele.create(newBases, a.isReference());
          alleles.add(trimmedAllele);
          originalToTrimmedAlleleMap.put(a, trimmedAllele);
        }
      }

      // detect case where we're trimming bases but resulting vc doesn't have any null allele. In
      // that case, we keep original representation
      // example: mixed records such as {TA*,TGA,TG}
      boolean hasNullAlleles = false;

      for (Allele a : originalToTrimmedAlleleMap.values()) {
        if (a.isNull()) hasNullAlleles = true;
        if (a.isReference()) refAllele = a;
      }

      if (!hasNullAlleles) return inputVC;
      // now we can recreate new genotypes with trimmed alleles
      for (final Genotype genotype : inputVC.getGenotypes()) {

        List<Allele> originalAlleles = genotype.getAlleles();
        List<Allele> trimmedAlleles = new ArrayList<Allele>();
        for (Allele a : originalAlleles) {
          if (a.isCalled()) trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
          else trimmedAlleles.add(Allele.NO_CALL);
        }
        genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
      }

      final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
      return builder
          .alleles(alleles)
          .genotypes(genotypes)
          .attributes(attributes)
          .referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0]))
          .make();
    }

    return inputVC;
  }
  public static VariantContext createVariantContextWithPaddedAlleles(
      VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
    // see if we need to pad common reference base from all alleles
    boolean padVC;

    // We need to pad a VC with a common base if the length of the reference allele is less than the
    // length of the VariantContext.
    // This happens because the position of e.g. an indel is always one before the actual event (as
    // per VCF convention).
    long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1;
    if (inputVC.hasSymbolicAlleles()) padVC = true;
    else if (inputVC.getReference().length() == locLength) padVC = false;
    else if (inputVC.getReference().length() == locLength - 1) padVC = true;
    else
      throw new IllegalArgumentException(
          "Badly formed variant context at location "
              + String.valueOf(inputVC.getStart())
              + " in contig "
              + inputVC.getChr()
              + ". Reference length must be at most one base shorter than location size");

    // nothing to do if we don't need to pad bases
    if (padVC) {
      if (!inputVC.hasReferenceBaseForIndel())
        throw new ReviewedStingException(
            "Badly formed variant context at location "
                + inputVC.getChr()
                + ":"
                + inputVC.getStart()
                + "; no padded reference base is available.");

      Byte refByte = inputVC.getReferenceBaseForIndel();

      List<Allele> alleles = new ArrayList<Allele>();

      for (Allele a : inputVC.getAlleles()) {
        // get bases for current allele and create a new one with trimmed bases
        if (a.isSymbolic()) {
          alleles.add(a);
        } else {
          String newBases;
          if (refBaseShouldBeAppliedToEndOfAlleles)
            newBases = a.getBaseString() + new String(new byte[] {refByte});
          else newBases = new String(new byte[] {refByte}) + a.getBaseString();
          alleles.add(Allele.create(newBases, a.isReference()));
        }
      }

      // now we can recreate new genotypes with trimmed alleles
      GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
      for (final Genotype g : inputVC.getGenotypes()) {
        List<Allele> inAlleles = g.getAlleles();
        List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
        for (Allele a : inAlleles) {
          if (a.isCalled()) {
            if (a.isSymbolic()) {
              newGenotypeAlleles.add(a);
            } else {
              String newBases;
              if (refBaseShouldBeAppliedToEndOfAlleles)
                newBases = a.getBaseString() + new String(new byte[] {refByte});
              else newBases = new String(new byte[] {refByte}) + a.getBaseString();
              newGenotypeAlleles.add(Allele.create(newBases, a.isReference()));
            }
          } else {
            // add no-call allele
            newGenotypeAlleles.add(Allele.NO_CALL);
          }
        }
        genotypes.add(
            new Genotype(
                g.getSampleName(),
                newGenotypeAlleles,
                g.getLog10PError(),
                g.getFilters(),
                g.getAttributes(),
                g.isPhased()));
      }

      return new VariantContextBuilder(inputVC).alleles(alleles).genotypes(genotypes).make();
    } else return inputVC;
  }
예제 #9
0
파일: PhasingUtils.java 프로젝트: nh13/gatk
  static VariantContext reallyMergeIntoMNP(
      VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
    int startInter = vc1.getEnd() + 1;
    int endInter = vc2.getStart() - 1;
    byte[] intermediateBases = null;
    if (startInter <= endInter) {
      intermediateBases =
          referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases();
      StringUtil.toUpperCase(intermediateBases);
    }
    MergedAllelesData mergeData =
        new MergedAllelesData(
            intermediateBases, vc1, vc2); // ensures that the reference allele is added

    GenotypesContext mergedGenotypes = GenotypesContext.create();
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      List<Allele> site1Alleles = gt1.getAlleles();
      List<Allele> site2Alleles = gt2.getAlleles();

      List<Allele> mergedAllelesForSample = new LinkedList<Allele>();

      /* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records,
        we preserve phase information (if any) relative to whatever precedes vc1:
      */
      Iterator<Allele> all2It = site2Alleles.iterator();
      for (Allele all1 : site1Alleles) {
        Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()

        Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2);
        mergedAllelesForSample.add(mergedAllele);
      }

      double mergedGQ = Math.max(gt1.getLog10PError(), gt2.getLog10PError());
      Set<String> mergedGtFilters =
          new HashSet<
              String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered

      Map<String, Object> mergedGtAttribs = new HashMap<String, Object>();
      PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2);
      if (phaseQual.PQ != null) mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);

      Genotype mergedGt =
          new Genotype(
              gt1.getSampleName(),
              mergedAllelesForSample,
              mergedGQ,
              mergedGtFilters,
              mergedGtAttribs,
              phaseQual.isPhased);
      mergedGenotypes.add(mergedGt);
    }

    String mergedName = mergeVariantContextNames(vc1.getSource(), vc2.getSource());
    double mergedLog10PError = Math.min(vc1.getLog10PError(), vc2.getLog10PError());
    Set<String> mergedFilters =
        new HashSet<
            String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered
    Map<String, Object> mergedAttribs = mergeVariantContextAttributes(vc1, vc2);

    // ids
    List<String> mergedIDs = new ArrayList<String>();
    if (vc1.hasID()) mergedIDs.add(vc1.getID());
    if (vc2.hasID()) mergedIDs.add(vc2.getID());
    String mergedID =
        mergedIDs.isEmpty()
            ? VCFConstants.EMPTY_ID_FIELD
            : Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs);

    VariantContextBuilder mergedBuilder =
        new VariantContextBuilder(
                mergedName,
                vc1.getChr(),
                vc1.getStart(),
                vc2.getEnd(),
                mergeData.getAllMergedAlleles())
            .id(mergedID)
            .genotypes(mergedGenotypes)
            .log10PError(mergedLog10PError)
            .filters(mergedFilters)
            .attributes(mergedAttribs);
    VariantContextUtils.calculateChromosomeCounts(mergedBuilder, true);
    return mergedBuilder.make();
  }