Beispiel #1
0
  private Map<String, Object> calculateIC(final VariantContext vc) {
    final GenotypesContext genotypes =
        (founderIds == null || founderIds.isEmpty())
            ? vc.getGenotypes()
            : vc.getGenotypes(founderIds);
    if (genotypes == null || genotypes.size() < MIN_SAMPLES) return null;

    int idxAA = 0, idxAB = 1, idxBB = 2;

    if (!vc.isBiallelic()) {
      // for non-bliallelic case, do test with most common alt allele.
      // Get then corresponding indeces in GL vectors to retrieve GL of AA,AB and BB.
      int[] idxVector = vc.getGLIndecesOfAlternateAllele(vc.getAltAlleleWithHighestAlleleCount());
      idxAA = idxVector[0];
      idxAB = idxVector[1];
      idxBB = idxVector[2];
    }

    double refCount = 0.0;
    double hetCount = 0.0;
    double homCount = 0.0;
    int N = 0; // number of samples that have likelihoods
    for (final Genotype g : genotypes) {
      if (g.isNoCall() || !g.hasLikelihoods()) continue;

      if (g.getPloidy() != 2) // only work for diploid samples
      continue;

      N++;
      final double[] normalizedLikelihoods =
          MathUtils.normalizeFromLog10(g.getLikelihoods().getAsVector());
      refCount += normalizedLikelihoods[idxAA];
      hetCount += normalizedLikelihoods[idxAB];
      homCount += normalizedLikelihoods[idxBB];
    }

    if (N < MIN_SAMPLES) {
      return null;
    }

    final double p =
        (2.0 * refCount + hetCount)
            / (2.0 * (refCount + hetCount + homCount)); // expected reference allele frequency
    final double q = 1.0 - p; // expected alternative allele frequency
    final double F = 1.0 - (hetCount / (2.0 * p * q * (double) N)); // inbreeding coefficient

    Map<String, Object> map = new HashMap<String, Object>();
    map.put(getKeyNames().get(0), String.format("%.4f", F));
    return map;
  }
Beispiel #2
0
  /**
   * Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN,
   * AC, AF).
   *
   * @param vc the VariantContext record to subset
   * @param samples the samples to extract
   * @return the subsetted VariantContext
   */
  private VariantContext subsetRecord(VariantContext vc, Set<String> samples) {
    if (samples == null || samples.isEmpty()) return vc;

    ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
    for (Map.Entry<String, Genotype> genotypePair : vc.getGenotypes().entrySet()) {
      if (samples.contains(genotypePair.getKey())) genotypes.add(genotypePair.getValue());
    }

    VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles());

    // if we have fewer alternate alleles in the selected VC than in the original VC, we need to
    // strip out the GL/PLs (because they are no longer accurate)
    if (vc.getAlleles().size() != sub.getAlleles().size())
      sub = VariantContext.modifyGenotypes(sub, VariantContextUtils.stripPLs(vc.getGenotypes()));

    HashMap<String, Object> attributes = new HashMap<String, Object>(sub.getAttributes());

    int depth = 0;
    for (String sample : sub.getSampleNames()) {
      Genotype g = sub.getGenotype(sample);

      if (g.isNotFiltered() && g.isCalled()) {

        String dp = (String) g.getAttribute("DP");
        if (dp != null
            && !dp.equals(VCFConstants.MISSING_DEPTH_v3)
            && !dp.equals(VCFConstants.MISSING_VALUE_v4)) {
          depth += Integer.valueOf(dp);
        }
      }
    }

    if (KEEP_ORIGINAL_CHR_COUNTS) {
      if (attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY))
        attributes.put("AC_Orig", attributes.get(VCFConstants.ALLELE_COUNT_KEY));
      if (attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY))
        attributes.put("AF_Orig", attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY));
      if (attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY))
        attributes.put("AN_Orig", attributes.get(VCFConstants.ALLELE_NUMBER_KEY));
    }

    VariantContextUtils.calculateChromosomeCounts(sub, attributes, false);
    attributes.put("DP", depth);

    sub = VariantContext.modifyAttributes(sub, attributes);

    return sub;
  }
Beispiel #3
0
  private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
    Set<String> keys = new HashSet<String>();

    boolean sawGoodGT = false;
    boolean sawGoodQual = false;
    boolean sawGenotypeFilter = false;
    for (Genotype g : vc.getGenotypes().values()) {
      keys.addAll(g.getAttributes().keySet());
      if (g.isAvailable()) sawGoodGT = true;
      if (g.hasNegLog10PError()) sawGoodQual = true;
      if (g.isFiltered() && g.isCalled()) sawGenotypeFilter = true;
    }

    if (sawGoodQual) keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);

    if (sawGenotypeFilter) keys.add(VCFConstants.GENOTYPE_FILTER_KEY);

    List<String> sortedList = ParsingUtils.sortList(new ArrayList<String>(keys));

    // make sure the GT is first
    if (sawGoodGT) {
      List<String> newList = new ArrayList<String>(sortedList.size() + 1);
      newList.add(VCFConstants.GENOTYPE_KEY);
      newList.addAll(sortedList);
      sortedList = newList;
    }

    return sortedList;
  }
Beispiel #4
0
  /**
   * Checks if vc has a variant call for (at least one of) the samples.
   *
   * @param vc the variant rod VariantContext. Here, the variant is the dataset you're looking for
   *     discordances to (e.g. HapMap)
   * @param compVCs the comparison VariantContext (discordance
   * @return
   */
  private boolean isDiscordant(VariantContext vc, Collection<VariantContext> compVCs) {
    if (vc == null) return false;

    // if we're not looking at specific samples then the absence of a compVC means discordance
    if (NO_SAMPLES_SPECIFIED) return (compVCs == null || compVCs.isEmpty());

    // check if we find it in the variant rod
    Map<String, Genotype> genotypes = vc.getGenotypes(samples);
    for (Genotype g : genotypes.values()) {
      if (sampleHasVariant(g)) {
        // There is a variant called (or filtered with not exclude filtered option set) that is not
        // HomRef for at least one of the samples.
        if (compVCs == null) return true;
        // Look for this sample in the all vcs of the comp ROD track.
        boolean foundVariant = false;
        for (VariantContext compVC : compVCs) {
          if (sampleHasVariant(compVC.getGenotype(g.getSampleName()))) {
            foundVariant = true;
            break;
          }
        }
        // if (at least one sample) was not found in all VCs of the comp ROD, we have discordance
        if (!foundVariant) return true;
      }
    }
    return false; // we only get here if all samples have a variant in the comp rod.
  }
Beispiel #5
0
  static boolean allGenotypesAreUnfilteredAndCalled(VariantContext vc) {
    for (final Genotype gt : vc.getGenotypes()) {
      if (gt.isNoCall() || gt.isFiltered()) return false;
    }

    return true;
  }
  /**
   * Returns a context identical to this with the REF and ALT alleles reverse complemented.
   *
   * @param vc variant context
   * @return new vc
   */
  public static VariantContext reverseComplement(VariantContext vc) {
    // create a mapping from original allele to reverse complemented allele
    HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
    for (Allele originalAllele : vc.getAlleles()) {
      Allele newAllele;
      if (originalAllele.isNoCall() || originalAllele.isNull()) newAllele = originalAllele;
      else
        newAllele =
            Allele.create(
                BaseUtils.simpleReverseComplement(originalAllele.getBases()),
                originalAllele.isReference());
      alleleMap.put(originalAllele, newAllele);
    }

    // create new Genotype objects
    GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
    for (final Genotype genotype : vc.getGenotypes()) {
      List<Allele> newAlleles = new ArrayList<Allele>();
      for (Allele allele : genotype.getAlleles()) {
        Allele newAllele = alleleMap.get(allele);
        if (newAllele == null) newAllele = Allele.NO_CALL;
        newAlleles.add(newAllele);
      }
      newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
    }

    return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
  }
Beispiel #7
0
  protected final void printCallInfo(
      final VariantContext vc,
      final double[] log10AlleleFrequencyPriors,
      final long runtimeNano,
      final AFCalcResult result) {
    printCallElement(vc, "type", "ignore", vc.getType());

    int allelei = 0;
    for (final Allele a : vc.getAlleles())
      printCallElement(vc, "allele", allelei++, a.getDisplayString());

    for (final Genotype g : vc.getGenotypes())
      printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());

    for (int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++)
      printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);

    printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
    printCallElement(vc, "log10PosteriorOfAFEq0", "ignore", result.getLog10PosteriorOfAFEq0());
    printCallElement(vc, "log10PosteriorOfAFGt0", "ignore", result.getLog10PosteriorOfAFGT0());

    for (final Allele allele : result.getAllelesUsedInGenotyping()) {
      if (allele.isNonReference()) {
        printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele));
        printCallElement(
            vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele));
      }
    }

    callReport.flush();
  }
  private static void mergeGenotypes(
      GenotypesContext mergedGenotypes,
      VariantContext oneVC,
      AlleleMapper alleleMapping,
      boolean uniqifySamples) {
    for (Genotype g : oneVC.getGenotypes()) {
      String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples);
      if (!mergedGenotypes.containsSample(name)) {
        // only add if the name is new
        Genotype newG = g;

        if (uniqifySamples || alleleMapping.needsRemapping()) {
          final List<Allele> alleles =
              alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles();
          newG =
              new Genotype(
                  name,
                  alleles,
                  g.getLog10PError(),
                  g.getFilters(),
                  g.getAttributes(),
                  g.isPhased());
        }

        mergedGenotypes.add(newG);
      }
    }
  }
Beispiel #9
0
  public Map<String, Object> annotate(
      RefMetaDataTracker tracker,
      AnnotatorCompatible walker,
      ReferenceContext ref,
      Map<String, AlignmentContext> stratifiedContexts,
      VariantContext vc) {
    if (stratifiedContexts.size()
        == 0) // size 0 means that call was made by someone else and we have no data here
    return null;

    if (!vc.isSNP() && !vc.isIndel() && !vc.isMixed()) return null;

    final AlignmentContext context =
        AlignmentContextUtils.joinContexts(stratifiedContexts.values());

    final int contextWingSize = Math.min((ref.getWindow().size() - 1) / 2, MIN_CONTEXT_WING_SIZE);
    final int contextSize = contextWingSize * 2 + 1;

    final int locus =
        ref.getLocus().getStart() + (ref.getLocus().getStop() - ref.getLocus().getStart()) / 2;

    final ReadBackedPileup pileup = context.getBasePileup();

    // Compute all haplotypes consistent with the current read pileup
    final List<Haplotype> haplotypes = computeHaplotypes(pileup, contextSize, locus, vc);

    final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage();
    if (haplotypes != null) {
      for (final Genotype genotype : vc.getGenotypes()) {
        final AlignmentContext thisContext = stratifiedContexts.get(genotype.getSampleName());
        if (thisContext != null) {
          final ReadBackedPileup thisPileup = thisContext.getBasePileup();
          if (vc.isSNP())
            scoreRA.add(
                scoreReadsAgainstHaplotypes(
                    haplotypes,
                    thisPileup,
                    contextSize,
                    locus)); // Taking the simple average of all sample's score since the score can
                             // be negative and the RMS doesn't make sense
          else if (vc.isIndel() || vc.isMixed()) {
            Double d = scoreIndelsAgainstHaplotypes(thisPileup);
            if (d == null) return null;
            scoreRA.add(
                d); // Taking the simple average of all sample's score since the score can be
                    // negative and the RMS doesn't make sense
          }
        }
      }
    }

    // annotate the score in the info field
    final Map<String, Object> map = new HashMap<String, Object>();
    map.put(getKeyNames().get(0), String.format("%.4f", scoreRA.mean()));
    return map;
  }
Beispiel #10
0
  static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) {
    // Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1:
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom
      return false;
    }

    return true;
  }
  public static VariantContext purgeUnallowedGenotypeAttributes(
      VariantContext vc, Set<String> allowedAttributes) {
    if (allowedAttributes == null) return vc;

    GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
    for (final Genotype genotype : vc.getGenotypes()) {
      Map<String, Object> attrs = new HashMap<String, Object>();
      for (Map.Entry<String, Object> attr : genotype.getAttributes().entrySet()) {
        if (allowedAttributes.contains(attr.getKey())) attrs.put(attr.getKey(), attr.getValue());
      }
      newGenotypes.add(Genotype.modifyAttributes(genotype, attrs));
    }

    return new VariantContextBuilder(vc).genotypes(newGenotypes).make();
  }
Beispiel #12
0
 /**
  * Copy constructor
  *
  * @param other the VariantContext to copy
  */
 protected VariantContext(VariantContext other) {
   this(
       other.getSource(),
       other.getID(),
       other.getChr(),
       other.getStart(),
       other.getEnd(),
       other.getAlleles(),
       other.getGenotypes(),
       other.getLog10PError(),
       other.getFiltersMaybeNull(),
       other.getAttributes(),
       other.REFERENCE_BASE_FOR_INDEL,
       NO_VALIDATION);
 }
Beispiel #13
0
  static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      List<Allele> site1Alleles = gt1.getAlleles();
      List<Allele> site2Alleles = gt2.getAlleles();

      Iterator<Allele> all2It = site2Alleles.iterator();
      for (Allele all1 : site1Alleles) {
        Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()

        if (all1.isNonReference() && all2.isNonReference()) // corresponding alleles are alternate
        return true;
      }
    }

    return false;
  }
Beispiel #14
0
  public void update2(
      VariantContext eval,
      VariantContext comp,
      RefMetaDataTracker tracker,
      ReferenceContext ref,
      AlignmentContext context) {
    if (eval == null || (getWalker().ignoreAC0Sites() && eval.isMonomorphicInSamples())) return;

    final Type type = getType(eval);
    if (type == null) return;

    TypeSampleMap titvTable = null;

    // update DP, if possible
    if (eval.hasAttribute(VCFConstants.DEPTH_KEY)) depthPerSample.inc(type, ALL);

    // update counts
    allVariantCounts.inc(type, ALL);

    // type specific calculations
    if (type == Type.SNP && eval.isBiallelic()) {
      titvTable =
          VariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample;
      titvTable.inc(type, ALL);
    }

    // novelty calculation
    if (comp != null || (type == Type.CNV && overlapsKnownCNV(eval)))
      knownVariantCounts.inc(type, ALL);

    // per sample metrics
    for (final Genotype g : eval.getGenotypes()) {
      if (!g.isNoCall() && !g.isHomRef()) {
        countsPerSample.inc(type, g.getSampleName());

        // update transition / transversion ratio
        if (titvTable != null) titvTable.inc(type, g.getSampleName());

        if (g.hasDP()) depthPerSample.inc(type, g.getSampleName());
      }
    }
  }
  @Test(dataProvider = "mergeGenotypes")
  public void testMergeGenotypes(MergeGenotypesTest cfg) {
    final VariantContext merged =
        VariantContextUtils.simpleMerge(
            genomeLocParser,
            cfg.inputs,
            cfg.priority,
            VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
            VariantContextUtils.GenotypeMergeType.PRIORITIZE,
            true,
            false,
            "set",
            false,
            false);

    // test alleles are equal
    Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());

    // test genotypes
    assertGenotypesAreMostlyEqual(merged.getGenotypes(), cfg.expected.getGenotypes());
  }
Beispiel #16
0
  static boolean doubleAllelesSegregatePerfectlyAmongSamples(
      VariantContext vc1, VariantContext vc2) {
    // Check that Alleles at vc1 and at vc2 always segregate together in all samples (including
    // reference):
    Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
    Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>();

    // Note the segregation of the alleles for the reference genome:
    allele1ToAllele2.put(vc1.getReference(), vc2.getReference());
    allele2ToAllele1.put(vc2.getReference(), vc1.getReference());

    // Note the segregation of the alleles for each sample (and check that it is consistent with the
    // reference and all previous samples).
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      List<Allele> site1Alleles = gt1.getAlleles();
      List<Allele> site2Alleles = gt2.getAlleles();

      Iterator<Allele> all2It = site2Alleles.iterator();
      for (Allele all1 : site1Alleles) {
        Allele all2 = all2It.next();

        Allele all1To2 = allele1ToAllele2.get(all1);
        if (all1To2 == null) allele1ToAllele2.put(all1, all2);
        else if (!all1To2.equals(all2)) // all1 segregates with two different alleles at site 2
        return false;

        Allele all2To1 = allele2ToAllele1.get(all2);
        if (all2To1 == null) allele2ToAllele1.put(all2, all1);
        else if (!all2To1.equals(all1)) // all2 segregates with two different alleles at site 1
        return false;
      }
    }

    return true;
  }
  public static VariantContextBuilder pruneVariantContext(
      final VariantContextBuilder builder, Collection<String> keysToPreserve) {
    final VariantContext vc = builder.make();
    if (keysToPreserve == null) keysToPreserve = Collections.emptyList();

    // VC info
    final Map<String, Object> attributes = subsetAttributes(vc.commonInfo, keysToPreserve);

    // Genotypes
    final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
    for (final Genotype g : vc.getGenotypes()) {
      Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve);
      genotypes.add(
          new Genotype(
              g.getSampleName(),
              g.getAlleles(),
              g.getLog10PError(),
              g.getFilters(),
              genotypeAttributes,
              g.isPhased()));
    }

    return builder.genotypes(genotypes).attributes(attributes);
  }
Beispiel #18
0
  static VariantContext reallyMergeIntoMNP(
      VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
    int startInter = vc1.getEnd() + 1;
    int endInter = vc2.getStart() - 1;
    byte[] intermediateBases = null;
    if (startInter <= endInter) {
      intermediateBases =
          referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases();
      StringUtil.toUpperCase(intermediateBases);
    }
    MergedAllelesData mergeData =
        new MergedAllelesData(
            intermediateBases, vc1, vc2); // ensures that the reference allele is added

    GenotypesContext mergedGenotypes = GenotypesContext.create();
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      List<Allele> site1Alleles = gt1.getAlleles();
      List<Allele> site2Alleles = gt2.getAlleles();

      List<Allele> mergedAllelesForSample = new LinkedList<Allele>();

      /* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records,
        we preserve phase information (if any) relative to whatever precedes vc1:
      */
      Iterator<Allele> all2It = site2Alleles.iterator();
      for (Allele all1 : site1Alleles) {
        Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()

        Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2);
        mergedAllelesForSample.add(mergedAllele);
      }

      double mergedGQ = Math.max(gt1.getLog10PError(), gt2.getLog10PError());
      Set<String> mergedGtFilters =
          new HashSet<
              String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered

      Map<String, Object> mergedGtAttribs = new HashMap<String, Object>();
      PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2);
      if (phaseQual.PQ != null) mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);

      Genotype mergedGt =
          new Genotype(
              gt1.getSampleName(),
              mergedAllelesForSample,
              mergedGQ,
              mergedGtFilters,
              mergedGtAttribs,
              phaseQual.isPhased);
      mergedGenotypes.add(mergedGt);
    }

    String mergedName = mergeVariantContextNames(vc1.getSource(), vc2.getSource());
    double mergedLog10PError = Math.min(vc1.getLog10PError(), vc2.getLog10PError());
    Set<String> mergedFilters =
        new HashSet<
            String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered
    Map<String, Object> mergedAttribs = mergeVariantContextAttributes(vc1, vc2);

    // ids
    List<String> mergedIDs = new ArrayList<String>();
    if (vc1.hasID()) mergedIDs.add(vc1.getID());
    if (vc2.hasID()) mergedIDs.add(vc2.getID());
    String mergedID =
        mergedIDs.isEmpty()
            ? VCFConstants.EMPTY_ID_FIELD
            : Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs);

    VariantContextBuilder mergedBuilder =
        new VariantContextBuilder(
                mergedName,
                vc1.getChr(),
                vc1.getStart(),
                vc2.getEnd(),
                mergeData.getAllMergedAlleles())
            .id(mergedID)
            .genotypes(mergedGenotypes)
            .log10PError(mergedLog10PError)
            .filters(mergedFilters)
            .attributes(mergedAttribs);
    VariantContextUtils.calculateChromosomeCounts(mergedBuilder, true);
    return mergedBuilder.make();
  }
Beispiel #19
0
  public void writeBeagleOutput(
      VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) {
    GenomeLoc currentLoc =
        VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), preferredVC);
    StringBuffer beagleOut = new StringBuffer();

    String marker = String.format("%s:%d ", currentLoc.getContig(), currentLoc.getStart());
    beagleOut.append(marker);
    if (markers != null)
      markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
    for (Allele allele : preferredVC.getAlleles()) {
      String bglPrintString;
      if (allele.isNoCall() || allele.isNull()) bglPrintString = "-";
      else bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele

      beagleOut.append(String.format("%s ", bglPrintString));
      if (markers != null) markers.append(bglPrintString).append("\t");
    }
    if (markers != null) markers.append("\n");

    GenotypesContext preferredGenotypes = preferredVC.getGenotypes();
    GenotypesContext otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
    for (String sample : samples) {
      boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE;

      Genotype genotype;
      boolean isValidation;
      // use sample as key into genotypes structure
      if (preferredGenotypes.containsSample(sample)) {
        genotype = preferredGenotypes.get(sample);
        isValidation = isValidationSite;
      } else if (otherGenotypes != null && otherGenotypes.containsSample(sample)) {
        genotype = otherGenotypes.get(sample);
        isValidation = !isValidationSite;
      } else {
        // there is magically no genotype for this sample.
        throw new StingException(
            "Sample "
                + sample
                + " arose with no genotype in variant or validation VCF. This should never happen.");
      }

      /*
       * Use likelihoods if: is validation, prior is negative; or: is not validation, has genotype key
       */
      double[] log10Likelihoods = null;
      if ((isValidation && prior < 0.0) || genotype.hasLikelihoods()) {
        log10Likelihoods = genotype.getLikelihoods().getAsVector();

        // see if we need to randomly mask out genotype in this position.
        if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() <= insertedNoCallRate) {
          // we are masking out this genotype
          log10Likelihoods =
              isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS;
        }

        if (isMaleOnChrX) {
          log10Likelihoods[1] = -255; // todo -- warning this is dangerous for multi-allele case
        }
      }
      /** otherwise, use the prior uniformly */
      else if (!isValidation && genotype.isCalled() && !genotype.hasLikelihoods()) {
        // hack to deal with input VCFs with no genotype likelihoods.  Just assume the called
        // genotype
        // is confident.  This is useful for Hapmap and 1KG release VCFs.
        double AA = (1.0 - prior) / 2.0;
        double AB = (1.0 - prior) / 2.0;
        double BB = (1.0 - prior) / 2.0;

        if (genotype.isHomRef()) {
          AA = prior;
        } else if (genotype.isHet()) {
          AB = prior;
        } else if (genotype.isHomVar()) {
          BB = prior;
        }

        log10Likelihoods = MathUtils.toLog10(new double[] {AA, isMaleOnChrX ? 0.0 : AB, BB});
      } else {
        log10Likelihoods =
            isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS;
      }

      writeSampleLikelihoods(beagleOut, preferredVC, log10Likelihoods);
    }

    beagleWriter.println(beagleOut.toString());
  }
  public static VariantContext createVariantContextWithPaddedAlleles(
      VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
    // see if we need to pad common reference base from all alleles
    boolean padVC;

    // We need to pad a VC with a common base if the length of the reference allele is less than the
    // length of the VariantContext.
    // This happens because the position of e.g. an indel is always one before the actual event (as
    // per VCF convention).
    long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1;
    if (inputVC.hasSymbolicAlleles()) padVC = true;
    else if (inputVC.getReference().length() == locLength) padVC = false;
    else if (inputVC.getReference().length() == locLength - 1) padVC = true;
    else
      throw new IllegalArgumentException(
          "Badly formed variant context at location "
              + String.valueOf(inputVC.getStart())
              + " in contig "
              + inputVC.getChr()
              + ". Reference length must be at most one base shorter than location size");

    // nothing to do if we don't need to pad bases
    if (padVC) {
      if (!inputVC.hasReferenceBaseForIndel())
        throw new ReviewedStingException(
            "Badly formed variant context at location "
                + inputVC.getChr()
                + ":"
                + inputVC.getStart()
                + "; no padded reference base is available.");

      Byte refByte = inputVC.getReferenceBaseForIndel();

      List<Allele> alleles = new ArrayList<Allele>();

      for (Allele a : inputVC.getAlleles()) {
        // get bases for current allele and create a new one with trimmed bases
        if (a.isSymbolic()) {
          alleles.add(a);
        } else {
          String newBases;
          if (refBaseShouldBeAppliedToEndOfAlleles)
            newBases = a.getBaseString() + new String(new byte[] {refByte});
          else newBases = new String(new byte[] {refByte}) + a.getBaseString();
          alleles.add(Allele.create(newBases, a.isReference()));
        }
      }

      // now we can recreate new genotypes with trimmed alleles
      GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
      for (final Genotype g : inputVC.getGenotypes()) {
        List<Allele> inAlleles = g.getAlleles();
        List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
        for (Allele a : inAlleles) {
          if (a.isCalled()) {
            if (a.isSymbolic()) {
              newGenotypeAlleles.add(a);
            } else {
              String newBases;
              if (refBaseShouldBeAppliedToEndOfAlleles)
                newBases = a.getBaseString() + new String(new byte[] {refByte});
              else newBases = new String(new byte[] {refByte}) + a.getBaseString();
              newGenotypeAlleles.add(Allele.create(newBases, a.isReference()));
            }
          } else {
            // add no-call allele
            newGenotypeAlleles.add(Allele.NO_CALL);
          }
        }
        genotypes.add(
            new Genotype(
                g.getSampleName(),
                newGenotypeAlleles,
                g.getLog10PError(),
                g.getFilters(),
                g.getAttributes(),
                g.isPhased()));
      }

      return new VariantContextBuilder(inputVC).alleles(alleles).genotypes(genotypes).make();
    } else return inputVC;
  }
Beispiel #21
0
  public String update1(
      VariantContext vc1,
      RefMetaDataTracker tracker,
      ReferenceContext ref,
      AlignmentContext context) {
    nCalledLoci++;

    // Note from Eric:
    // This is really not correct.  What we really want here is a polymorphic vs. monomorphic count
    // (i.e. on the Genotypes).
    // So in order to maintain consistency with the previous implementation (and the intention of
    // the original author), I've
    // added in a proxy check for monomorphic status here.
    // Protect against case when vc only as no-calls too - can happen if we strafity by sample and
    // sample as a single no-call.
    if (vc1.isMonomorphicInSamples()) {
      nRefLoci++;
    } else {
      switch (vc1.getType()) {
        case NO_VARIATION:
          // shouldn't get here
          break;
        case SNP:
          nVariantLoci++;
          nSNPs++;
          if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
          break;
        case MNP:
          nVariantLoci++;
          nMNPs++;
          if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
          break;
        case INDEL:
          nVariantLoci++;
          if (vc1.isSimpleInsertion()) nInsertions++;
          else if (vc1.isSimpleDeletion()) nDeletions++;
          else nComplex++;
          break;
        case MIXED:
          nVariantLoci++;
          nMixed++;
          break;
        case SYMBOLIC:
          nSymbolic++;
          break;
        default:
          throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType());
      }
    }

    String refStr = vc1.getReference().getBaseString().toUpperCase();

    String aaStr =
        vc1.hasAttribute("ANCESTRALALLELE")
            ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase()
            : null;
    //        if (aaStr.equals(".")) {
    //            aaStr = refStr;
    //        }

    // ref  aa  alt  class
    // A    C   A    der homozygote
    // A    C   C    anc homozygote

    // A    A   A    ref homozygote
    // A    A   C
    // A    C   A
    // A    C   C

    for (final Genotype g : vc1.getGenotypes()) {
      final String altStr =
          vc1.getAlternateAlleles().size() > 0
              ? vc1.getAlternateAllele(0).getBaseString().toUpperCase()
              : null;

      switch (g.getType()) {
        case NO_CALL:
          nNoCalls++;
          break;
        case HOM_REF:
          nHomRef++;

          if (aaStr != null && altStr != null && !refStr.equalsIgnoreCase(aaStr)) {
            nHomDerived++;
          }

          break;
        case HET:
          nHets++;
          break;
        case HOM_VAR:
          nHomVar++;

          if (aaStr != null && altStr != null && !altStr.equalsIgnoreCase(aaStr)) {
            nHomDerived++;
          }

          break;
        case MIXED:
          break;
        default:
          throw new ReviewedStingException("BUG: Unexpected genotype type: " + g);
      }
    }

    return null; // we don't capture any interesting sites
  }
  public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
    // see if we need to trim common reference base from all alleles
    boolean trimVC;

    // We need to trim common reference base from all alleles in all genotypes if a ref base is
    // common to all alleles
    Allele refAllele = inputVC.getReference();
    if (!inputVC.isVariant()) trimVC = false;
    else if (refAllele.isNull()) trimVC = false;
    else {
      trimVC =
          (AbstractVCFCodec.computeForwardClipping(
                  new ArrayList<Allele>(inputVC.getAlternateAlleles()),
                  inputVC.getReference().getDisplayString())
              > 0);
    }

    // nothing to do if we don't need to trim bases
    if (trimVC) {
      List<Allele> alleles = new ArrayList<Allele>();
      GenotypesContext genotypes = GenotypesContext.create();

      // set the reference base for indels in the attributes
      Map<String, Object> attributes = new TreeMap<String, Object>(inputVC.getAttributes());

      Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();

      for (Allele a : inputVC.getAlleles()) {
        if (a.isSymbolic()) {
          alleles.add(a);
          originalToTrimmedAlleleMap.put(a, a);
        } else {
          // get bases for current allele and create a new one with trimmed bases
          byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
          Allele trimmedAllele = Allele.create(newBases, a.isReference());
          alleles.add(trimmedAllele);
          originalToTrimmedAlleleMap.put(a, trimmedAllele);
        }
      }

      // detect case where we're trimming bases but resulting vc doesn't have any null allele. In
      // that case, we keep original representation
      // example: mixed records such as {TA*,TGA,TG}
      boolean hasNullAlleles = false;

      for (Allele a : originalToTrimmedAlleleMap.values()) {
        if (a.isNull()) hasNullAlleles = true;
        if (a.isReference()) refAllele = a;
      }

      if (!hasNullAlleles) return inputVC;
      // now we can recreate new genotypes with trimmed alleles
      for (final Genotype genotype : inputVC.getGenotypes()) {

        List<Allele> originalAlleles = genotype.getAlleles();
        List<Allele> trimmedAlleles = new ArrayList<Allele>();
        for (Allele a : originalAlleles) {
          if (a.isCalled()) trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
          else trimmedAlleles.add(Allele.NO_CALL);
        }
        genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
      }

      final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
      return builder
          .alleles(alleles)
          .genotypes(genotypes)
          .attributes(attributes)
          .referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0]))
          .make();
    }

    return inputVC;
  }