public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
    // if all alleles of vc1 are a contained in alleles of vc2, return true
    if (!vc1.getReference().equals(vc2.getReference())) return false;

    for (Allele a : vc1.getAlternateAlleles()) {
      if (!vc2.getAlternateAlleles().contains(a)) return false;
    }

    return true;
  }
  /**
   * Update the attributes of the attributes map given the VariantContext to reflect the proper
   * chromosome-based VCF tags
   *
   * @param vc the VariantContext
   * @param attributes the attributes map to populate; must not be null; may contain old values
   * @param removeStaleValues should we remove stale values from the mapping?
   * @return the attributes map provided as input, returned for programming convenience
   */
  public static Map<String, Object> calculateChromosomeCounts(
      VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) {
    final int AN = vc.getCalledChrCount();

    // if everyone is a no-call, remove the old attributes if requested
    if (AN == 0 && removeStaleValues) {
      if (attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY))
        attributes.remove(VCFConstants.ALLELE_COUNT_KEY);
      if (attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY))
        attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY);
      if (attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY))
        attributes.remove(VCFConstants.ALLELE_NUMBER_KEY);
      return attributes;
    }

    if (vc.hasGenotypes()) {
      attributes.put(VCFConstants.ALLELE_NUMBER_KEY, AN);

      // if there are alternate alleles, record the relevant tags
      if (vc.getAlternateAlleles().size() > 0) {
        final ArrayList<String> alleleFreqs = new ArrayList<String>();
        final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
        for (Allele allele : vc.getAlternateAlleles()) {
          int altChromosomes = vc.getCalledChrCount(allele);
          alleleCounts.add(altChromosomes);
          if (AN == 0) {
            alleleFreqs.add("0.0");
          } else {
            // todo -- this is a performance problem
            final String freq =
                String.format(
                    makePrecisionFormatStringFromDenominatorValue((double) AN),
                    ((double) altChromosomes / (double) AN));
            alleleFreqs.add(freq);
          }
        }

        attributes.put(
            VCFConstants.ALLELE_COUNT_KEY,
            alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
        attributes.put(
            VCFConstants.ALLELE_FREQUENCY_KEY,
            alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
      } else {
        attributes.put(VCFConstants.ALLELE_COUNT_KEY, 0);
        attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0);
      }
    }

    return attributes;
  }
  @Requires({"eval != null", "comp != null"})
  private EvalCompMatchType doEvalAndCompMatch(
      final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
    // find all of the matching comps
    if (comp.getType() != eval.getType()) return EvalCompMatchType.NO_MATCH;

    // find the comp which matches both the reference allele and alternate allele from eval
    final Allele altEval =
        eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
    final Allele altComp =
        comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
    if ((altEval == null && altComp == null)
        || (altEval != null
            && altEval.equals(altComp)
            && eval.getReference().equals(comp.getReference()))) return EvalCompMatchType.STRICT;
    else return requireStrictAlleleMatch ? EvalCompMatchType.NO_MATCH : EvalCompMatchType.LENIENT;
  }
Ejemplo n.º 4
0
  private boolean haveDifferentAltAlleles(VariantContext eval, VariantContext comp) {
    Collection<Allele> evalAlts = eval.getAlternateAlleles();
    Collection<Allele> compAlts = comp.getAlternateAlleles();
    if (evalAlts.size() != compAlts.size()) {
      return true;
    } else {
      // same size => every alt from eval must be in comp
      for (Allele a : evalAlts) {
        if (!compAlts.contains(a)) {
          //                    System.out.printf("Different alleles: %s:%d eval=%s
          // comp=%s\n\t\teval=%s\n\t\tcomp=%s%n",
          //                            eval.getChr(), eval.getStart(), eval.getAlleles(),
          // comp.getAlleles(), eval, comp);
          return true;
        }
      }

      return false;
    }
  }
  private Map<String, Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) {

    if (!stratifiedContext.hasBasePileup()) return null;

    HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
    for (Allele allele : vc.getAlternateAlleles()) alleleCounts.put(allele.getBases()[0], 0);

    ReadBackedPileup pileup = stratifiedContext.getBasePileup();
    int totalDepth = pileup.size();

    Map<String, Object> map = new HashMap<String, Object>();
    map.put(getKeyNames().get(0), totalDepth); // put total depth in right away

    if (totalDepth == 0) return map; // done, can not compute FA at 0 coverage!!

    int mq0 = 0; // number of "ref" reads that are acually mq0
    for (PileupElement p : pileup) {
      if (p.getMappingQual() == 0) {
        mq0++;
        continue;
      }
      if (alleleCounts.containsKey(p.getBase())) // non-mq0 read and it's an alt
      alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase()) + 1);
    }

    if (mq0 == totalDepth) return map; // if all reads are mq0, there is nothing left to do

    // we need to add counts in the correct order
    String[] fracs = new String[alleleCounts.size()];
    for (int i = 0; i < vc.getAlternateAlleles().size(); i++) {
      fracs[i] =
          String.format(
              "%.3f",
              ((float) alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))
                  / (totalDepth - mq0));
    }

    map.put(getKeyNames().get(1), fracs);
    return map;
  }
Ejemplo n.º 6
0
  /**
   * @param other VariantContext whose alternate alleles to compare against
   * @return true if this VariantContext has the same alternate alleles as other, regardless of
   *     ordering. Otherwise returns false.
   */
  public boolean hasSameAlternateAllelesAs(VariantContext other) {
    List<Allele> thisAlternateAlleles = getAlternateAlleles();
    List<Allele> otherAlternateAlleles = other.getAlternateAlleles();

    if (thisAlternateAlleles.size() != otherAlternateAlleles.size()) {
      return false;
    }

    for (Allele allele : thisAlternateAlleles) {
      if (!otherAlternateAlleles.contains(allele)) {
        return false;
      }
    }

    return true;
  }
Ejemplo n.º 7
0
  public String update1(
      VariantContext vc1,
      RefMetaDataTracker tracker,
      ReferenceContext ref,
      AlignmentContext context) {
    nCalledLoci++;

    // Note from Eric:
    // This is really not correct.  What we really want here is a polymorphic vs. monomorphic count
    // (i.e. on the Genotypes).
    // So in order to maintain consistency with the previous implementation (and the intention of
    // the original author), I've
    // added in a proxy check for monomorphic status here.
    // Protect against case when vc only as no-calls too - can happen if we strafity by sample and
    // sample as a single no-call.
    if (vc1.isMonomorphicInSamples()) {
      nRefLoci++;
    } else {
      switch (vc1.getType()) {
        case NO_VARIATION:
          // shouldn't get here
          break;
        case SNP:
          nVariantLoci++;
          nSNPs++;
          if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
          break;
        case MNP:
          nVariantLoci++;
          nMNPs++;
          if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
          break;
        case INDEL:
          nVariantLoci++;
          if (vc1.isSimpleInsertion()) nInsertions++;
          else if (vc1.isSimpleDeletion()) nDeletions++;
          else nComplex++;
          break;
        case MIXED:
          nVariantLoci++;
          nMixed++;
          break;
        case SYMBOLIC:
          nSymbolic++;
          break;
        default:
          throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType());
      }
    }

    String refStr = vc1.getReference().getBaseString().toUpperCase();

    String aaStr =
        vc1.hasAttribute("ANCESTRALALLELE")
            ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase()
            : null;
    //        if (aaStr.equals(".")) {
    //            aaStr = refStr;
    //        }

    // ref  aa  alt  class
    // A    C   A    der homozygote
    // A    C   C    anc homozygote

    // A    A   A    ref homozygote
    // A    A   C
    // A    C   A
    // A    C   C

    for (final Genotype g : vc1.getGenotypes()) {
      final String altStr =
          vc1.getAlternateAlleles().size() > 0
              ? vc1.getAlternateAllele(0).getBaseString().toUpperCase()
              : null;

      switch (g.getType()) {
        case NO_CALL:
          nNoCalls++;
          break;
        case HOM_REF:
          nHomRef++;

          if (aaStr != null && altStr != null && !refStr.equalsIgnoreCase(aaStr)) {
            nHomDerived++;
          }

          break;
        case HET:
          nHets++;
          break;
        case HOM_VAR:
          nHomVar++;

          if (aaStr != null && altStr != null && !altStr.equalsIgnoreCase(aaStr)) {
            nHomDerived++;
          }

          break;
        case MIXED:
          break;
        default:
          throw new ReviewedStingException("BUG: Unexpected genotype type: " + g);
      }
    }

    return null; // we don't capture any interesting sites
  }
Ejemplo n.º 8
0
  /**
   * add the genotype data
   *
   * @param vc the variant context
   * @param genotypeFormatKeys Genotype formatting string
   * @param alleleMap alleles for this context
   * @throws IOException for writer
   */
  private void addGenotypeData(
      VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
      throws IOException {

    for (String sample : mHeader.getGenotypeSamples()) {
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      Genotype g = vc.getGenotype(sample);
      if (g == null) {
        // TODO -- The VariantContext needs to know what the general ploidy is of the samples
        // TODO -- We shouldn't be assuming diploid genotypes here!
        mWriter.write(VCFConstants.EMPTY_GENOTYPE);
        continue;
      }

      List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
      for (String key : genotypeFormatKeys) {

        if (key.equals(VCFConstants.GENOTYPE_KEY)) {
          if (!g.isAvailable()) {
            throw new ReviewedStingException(
                "GTs cannot be missing for some samples if they are available for others in the record");
          }

          writeAllele(g.getAllele(0), alleleMap);
          for (int i = 1; i < g.getPloidy(); i++) {
            mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
            writeAllele(g.getAllele(i), alleleMap);
          }

          continue;
        }

        Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4;

        // some exceptions
        if (key.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
          if (Math.abs(g.getNegLog10PError() - Genotype.NO_NEG_LOG_10PERROR) < 1e-6)
            val = VCFConstants.MISSING_VALUE_v4;
          else {
            val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
          }
        } else if (key.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
          val =
              g.isFiltered()
                  ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters()))
                  : (g.filtersWereApplied()
                      ? VCFConstants.PASSES_FILTERS_v4
                      : VCFConstants.UNFILTERED);
        }

        VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
        if (metaData != null) {
          int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
          if (numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4)) {
            // If we have a missing field but multiple values are expected, we need to construct a
            // new string with all fields.
            // For example, if Number=2, the string has to be ".,."
            StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
            for (int i = 1; i < numInFormatField; i++) {
              sb.append(",");
              sb.append(VCFConstants.MISSING_VALUE_v4);
            }
            val = sb.toString();
          }
        }

        // assume that if key is absent, then the given string encoding suffices
        String outputValue = formatVCFField(val);
        if (outputValue != null) attrs.add(outputValue);
      }

      // strip off trailing missing values
      for (int i = attrs.size() - 1; i >= 0; i--) {
        if (isMissingValue(attrs.get(i))) attrs.remove(i);
        else break;
      }

      for (int i = 0; i < attrs.size(); i++) {
        if (i > 0 || genotypeFormatKeys.contains(VCFConstants.GENOTYPE_KEY))
          mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
        mWriter.write(attrs.get(i));
      }
    }
  }
Ejemplo n.º 9
0
  /**
   * add a record to the file
   *
   * @param vc the Variant Context object
   * @param refBase the ref base used for indels
   * @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE
   *     EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER
   *     THE EVENT INSTEAD)
   */
  public void add(VariantContext vc, byte refBase, boolean refBaseShouldBeAppliedToEndOfAlleles) {
    if (mHeader == null)
      throw new IllegalStateException(
          "The VCF Header must be written before records can be added: " + locationString());

    if (doNotWriteGenotypes) vc = VariantContext.modifyGenotypes(vc, null);

    try {
      vc =
          VariantContext.createVariantContextWithPaddedAlleles(
              vc, refBase, refBaseShouldBeAppliedToEndOfAlleles);

      // if we are doing on the fly indexing, add the record ***before*** we write any bytes
      if (indexer != null) indexer.addFeature(vc, positionalStream.getPosition());

      Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
      alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup

      // CHROM
      mWriter.write(vc.getChr());
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // POS
      mWriter.write(String.valueOf(vc.getStart()));
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // ID
      String ID = vc.hasID() ? vc.getID() : VCFConstants.EMPTY_ID_FIELD;
      mWriter.write(ID);
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // REF
      alleleMap.put(vc.getReference(), "0");
      String refString = vc.getReference().getDisplayString();
      mWriter.write(refString);
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // ALT
      if (vc.isVariant()) {
        Allele altAllele = vc.getAlternateAllele(0);
        alleleMap.put(altAllele, "1");
        String alt = altAllele.getDisplayString();
        mWriter.write(alt);

        for (int i = 1; i < vc.getAlternateAlleles().size(); i++) {
          altAllele = vc.getAlternateAllele(i);
          alleleMap.put(altAllele, String.valueOf(i + 1));
          alt = altAllele.getDisplayString();
          mWriter.write(",");
          mWriter.write(alt);
        }
      } else {
        mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD);
      }
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // QUAL
      if (!vc.hasNegLog10PError()) mWriter.write(VCFConstants.MISSING_VALUE_v4);
      else mWriter.write(getQualValue(vc.getPhredScaledQual()));
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // FILTER
      String filters =
          vc.isFiltered()
              ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters()))
              : (filtersWereAppliedToContext || vc.filtersWereApplied()
                  ? VCFConstants.PASSES_FILTERS_v4
                  : VCFConstants.UNFILTERED);
      mWriter.write(filters);
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // INFO
      Map<String, String> infoFields = new TreeMap<String, String>();
      for (Map.Entry<String, Object> field : vc.getAttributes().entrySet()) {
        String key = field.getKey();
        if (key.equals(VariantContext.ID_KEY)
            || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)
            || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY)
            || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY)) continue;

        String outputValue = formatVCFField(field.getValue());
        if (outputValue != null) infoFields.put(key, outputValue);
      }
      writeInfoString(infoFields);

      // FORMAT
      if (vc.hasAttribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY)) {
        mWriter.write(VCFConstants.FIELD_SEPARATOR);
        mWriter.write(vc.getAttributeAsString(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, ""));
      } else {
        List<String> genotypeAttributeKeys = new ArrayList<String>();
        if (vc.hasGenotypes()) {
          genotypeAttributeKeys.addAll(calcVCFGenotypeKeys(vc));
        } else if (mHeader.hasGenotypingData()) {
          // this needs to be done in case all samples are no-calls
          genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
        }

        if (genotypeAttributeKeys.size() > 0) {
          String genotypeFormatString =
              ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);
          mWriter.write(VCFConstants.FIELD_SEPARATOR);
          mWriter.write(genotypeFormatString);

          addGenotypeData(vc, alleleMap, genotypeAttributeKeys);
        }
      }

      mWriter.write("\n");
      mWriter.flush(); // necessary so that writing to an output stream will work
    } catch (IOException e) {
      throw new RuntimeException("Unable to write the VCF object to " + locationString());
    }
  }
  public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
    // see if we need to trim common reference base from all alleles
    boolean trimVC;

    // We need to trim common reference base from all alleles in all genotypes if a ref base is
    // common to all alleles
    Allele refAllele = inputVC.getReference();
    if (!inputVC.isVariant()) trimVC = false;
    else if (refAllele.isNull()) trimVC = false;
    else {
      trimVC =
          (AbstractVCFCodec.computeForwardClipping(
                  new ArrayList<Allele>(inputVC.getAlternateAlleles()),
                  inputVC.getReference().getDisplayString())
              > 0);
    }

    // nothing to do if we don't need to trim bases
    if (trimVC) {
      List<Allele> alleles = new ArrayList<Allele>();
      GenotypesContext genotypes = GenotypesContext.create();

      // set the reference base for indels in the attributes
      Map<String, Object> attributes = new TreeMap<String, Object>(inputVC.getAttributes());

      Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();

      for (Allele a : inputVC.getAlleles()) {
        if (a.isSymbolic()) {
          alleles.add(a);
          originalToTrimmedAlleleMap.put(a, a);
        } else {
          // get bases for current allele and create a new one with trimmed bases
          byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
          Allele trimmedAllele = Allele.create(newBases, a.isReference());
          alleles.add(trimmedAllele);
          originalToTrimmedAlleleMap.put(a, trimmedAllele);
        }
      }

      // detect case where we're trimming bases but resulting vc doesn't have any null allele. In
      // that case, we keep original representation
      // example: mixed records such as {TA*,TGA,TG}
      boolean hasNullAlleles = false;

      for (Allele a : originalToTrimmedAlleleMap.values()) {
        if (a.isNull()) hasNullAlleles = true;
        if (a.isReference()) refAllele = a;
      }

      if (!hasNullAlleles) return inputVC;
      // now we can recreate new genotypes with trimmed alleles
      for (final Genotype genotype : inputVC.getGenotypes()) {

        List<Allele> originalAlleles = genotype.getAlleles();
        List<Allele> trimmedAlleles = new ArrayList<Allele>();
        for (Allele a : originalAlleles) {
          if (a.isCalled()) trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
          else trimmedAlleles.add(Allele.NO_CALL);
        }
        genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
      }

      final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
      return builder
          .alleles(alleles)
          .genotypes(genotypes)
          .attributes(attributes)
          .referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0]))
          .make();
    }

    return inputVC;
  }
Ejemplo n.º 11
0
  private List<Haplotype> computeHaplotypes(
      final ReadBackedPileup pileup,
      final int contextSize,
      final int locus,
      final VariantContext vc) {
    // Compute all possible haplotypes consistent with current pileup

    int haplotypesToCompute = vc.getAlternateAlleles().size() + 1;

    final PriorityQueue<Haplotype> candidateHaplotypeQueue =
        new PriorityQueue<Haplotype>(100, new HaplotypeComparator());
    final PriorityQueue<Haplotype> consensusHaplotypeQueue =
        new PriorityQueue<Haplotype>(
            MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER, new HaplotypeComparator());

    for (final PileupElement p : pileup) {
      final Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize, locus);
      candidateHaplotypeQueue.add(haplotypeFromRead);
    }

    // Now that priority queue has been built with all reads at context, we need to merge and find
    // possible segregating haplotypes
    Haplotype elem;
    while ((elem = candidateHaplotypeQueue.poll()) != null) {
      boolean foundHaplotypeMatch = false;
      Haplotype lastCheckedHaplotype = null;
      for (final Haplotype haplotypeFromList : consensusHaplotypeQueue) {
        final Haplotype consensusHaplotype = getConsensusHaplotype(elem, haplotypeFromList);
        if (consensusHaplotype != null) {
          foundHaplotypeMatch = true;
          if (consensusHaplotype.getQualitySum() > haplotypeFromList.getQualitySum()) {
            consensusHaplotypeQueue.remove(haplotypeFromList);
            consensusHaplotypeQueue.add(consensusHaplotype);
          }
          break;
        } else {
          lastCheckedHaplotype = haplotypeFromList;
        }
      }

      if (!foundHaplotypeMatch
          && consensusHaplotypeQueue.size() < MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER) {
        consensusHaplotypeQueue.add(elem);
      } else if (!foundHaplotypeMatch
          && lastCheckedHaplotype != null
          && elem.getQualitySum() > lastCheckedHaplotype.getQualitySum()) {
        consensusHaplotypeQueue.remove(lastCheckedHaplotype);
        consensusHaplotypeQueue.add(elem);
      }
    }

    // Now retrieve the N most popular haplotypes
    if (consensusHaplotypeQueue.size() > 0) {
      // The consensus haplotypes are in a quality-ordered priority queue, so the best haplotypes
      // are just the ones at the front of the queue
      final Haplotype haplotype1 = consensusHaplotypeQueue.poll();

      List<Haplotype> hlist = new ArrayList<Haplotype>();
      hlist.add(new Haplotype(haplotype1.getBases(), 60));

      for (int k = 1; k < haplotypesToCompute; k++) {
        Haplotype haplotype2 = consensusHaplotypeQueue.poll();
        if (haplotype2 == null) {
          haplotype2 = haplotype1;
        } // Sometimes only the reference haplotype can be found
        hlist.add(new Haplotype(haplotype2.getBases(), 20));
      }
      return hlist;
    } else return null;
  }
Ejemplo n.º 12
0
  /**
   * Main entry function to calculate genotypes of a given VC with corresponding GL's
   *
   * @param tracker Tracker
   * @param refContext Reference context
   * @param rawContext Raw context
   * @param stratifiedContexts Stratified alignment contexts
   * @param vc Input VC
   * @param model GL calculation model
   * @param inheritAttributesFromInputVC Output VC will contain attributes inherited from input vc
   * @return VC with assigned genotypes
   */
  public VariantCallContext calculateGenotypes(
      final RefMetaDataTracker tracker,
      final ReferenceContext refContext,
      final AlignmentContext rawContext,
      Map<String, AlignmentContext> stratifiedContexts,
      final VariantContext vc,
      final GenotypeLikelihoodsCalculationModel.Model model,
      final boolean inheritAttributesFromInputVC,
      final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap>
          perReadAlleleLikelihoodMap) {

    boolean limitedContext =
        tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;

    // initialize the data for this thread if that hasn't been done yet
    if (afcm.get() == null) {
      afcm.set(AFCalcFactory.createAFCalc(UAC, N, logger));
    }

    // estimate our confidence in a reference call and return
    if (vc.getNSamples() == 0) {
      if (limitedContext) return null;
      return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES
          ? estimateReferenceConfidence(vc, stratifiedContexts, getTheta(model), false, 1.0)
          : generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
    }

    AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));

    // is the most likely frequency conformation AC=0 for all alternate alleles?
    boolean bestGuessIsRef = true;

    // determine which alternate alleles have AF>0
    final List<Allele> myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
    final List<Integer> alleleCountsofMLE = new ArrayList<Integer>(vc.getAlleles().size());
    myAlleles.add(vc.getReference());
    for (int i = 0; i < AFresult.getAllelesUsedInGenotyping().size(); i++) {
      final Allele alternateAllele = AFresult.getAllelesUsedInGenotyping().get(i);
      if (alternateAllele.isReference()) continue;

      // we are non-ref if the probability of being non-ref > the emit confidence.
      // the emit confidence is phred-scaled, say 30 => 10^-3.
      // the posterior AF > 0 is log10: -5 => 10^-5
      // we are non-ref if 10^-5 < 10^-3 => -5 < -3
      final boolean isNonRef =
          AFresult.isPolymorphic(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING / -10.0);

      // if the most likely AC is not 0, then this is a good alternate allele to use
      if (isNonRef) {
        myAlleles.add(alternateAllele);
        alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
        bestGuessIsRef = false;
      }
      // if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
      else if (UAC.GenotypingMode
          == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
        myAlleles.add(alternateAllele);
        alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
      }
    }

    final double PoFGT0 = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0());

    // note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
    final double phredScaledConfidence =
        Math.abs(
            !bestGuessIsRef
                    || UAC.GenotypingMode
                        == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE
                            .GENOTYPE_GIVEN_ALLELES
                ? -10 * AFresult.getLog10PosteriorOfAFEq0()
                : -10 * AFresult.getLog10PosteriorOfAFGT0());

    // return a null call if we don't pass the confidence cutoff or the most likely allele frequency
    // is zero
    if (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES
        && !passesEmitThreshold(phredScaledConfidence, bestGuessIsRef)) {
      // technically, at this point our confidence in a reference call isn't accurately estimated
      //  because it didn't take into account samples with no data, so let's get a better estimate
      return limitedContext
          ? null
          : estimateReferenceConfidence(vc, stratifiedContexts, getTheta(model), true, PoFGT0);
    }

    // start constructing the resulting VC
    final GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
    final VariantContextBuilder builder =
        new VariantContextBuilder(
            "UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
    builder.log10PError(phredScaledConfidence / -10.0);
    if (!passesCallThreshold(phredScaledConfidence)) builder.filters(filter);

    // create the genotypes
    final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true, ploidy);
    builder.genotypes(genotypes);

    // print out stats if we have a writer
    if (verboseWriter != null && !limitedContext)
      printVerboseData(refContext.getLocus().toString(), vc, PoFGT0, phredScaledConfidence, model);

    // *** note that calculating strand bias involves overwriting data structures, so we do that
    // last
    final HashMap<String, Object> attributes = new HashMap<String, Object>();

    // inherit attributed from input vc if requested
    if (inheritAttributesFromInputVC) attributes.putAll(vc.getAttributes());
    // if the site was downsampled, record that fact
    if (!limitedContext && rawContext.hasPileupBeenDownsampled())
      attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);

    if (UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED)
      attributes.put(NUMBER_OF_DISCOVERED_ALLELES_KEY, vc.getAlternateAlleles().size());

    // add the MLE AC and AF annotations
    if (alleleCountsofMLE.size() > 0) {
      attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE);
      final int AN = builder.make().getCalledChrCount();
      final ArrayList<Double> MLEfrequencies = new ArrayList<Double>(alleleCountsofMLE.size());
      // the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT
      // is ./. but the exact model may arbitrarily choose an AC>1)
      for (int AC : alleleCountsofMLE) MLEfrequencies.add(Math.min(1.0, (double) AC / (double) AN));
      attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
    }

    if (UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef) {
      // final boolean DEBUG_SLOD = false;

      // the overall lod
      // double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
      double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
      // if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);

      List<Allele> allAllelesToUse = builder.make().getAlleles();

      // the forward lod
      VariantContext vcForward =
          calculateLikelihoods(
              tracker,
              refContext,
              stratifiedContexts,
              AlignmentContextUtils.ReadOrientation.FORWARD,
              allAllelesToUse,
              false,
              model,
              perReadAlleleLikelihoodMap);
      AFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model));
      // double[] normalizedLog10Posteriors =
      // MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
      double forwardLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0();
      double forwardLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
      // if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ",
      // forwardLog10PofF=" + forwardLog10PofF);

      // the reverse lod
      VariantContext vcReverse =
          calculateLikelihoods(
              tracker,
              refContext,
              stratifiedContexts,
              AlignmentContextUtils.ReadOrientation.REVERSE,
              allAllelesToUse,
              false,
              model,
              perReadAlleleLikelihoodMap);
      AFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model));
      // normalizedLog10Posteriors =
      // MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
      double reverseLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0();
      double reverseLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
      // if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ",
      // reverseLog10PofF=" + reverseLog10PofF);

      double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
      double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
      // if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" +
      // reverseLod);

      // strand score is max bias between forward and reverse strands
      double strandScore = Math.max(forwardLod, reverseLod);
      // rescale by a factor of 10
      strandScore *= 10.0;
      // logger.debug(String.format("SLOD=%f", strandScore));

      if (!Double.isNaN(strandScore)) attributes.put("SB", strandScore);
    }

    // finish constructing the resulting VC
    builder.attributes(attributes);
    VariantContext vcCall = builder.make();

    // if we are subsetting alleles (either because there were too many or because some were not
    // polymorphic)
    // then we may need to trim the alleles (because the original VariantContext may have had to pad
    // at the end).
    if (myAlleles.size() != vc.getAlleles().size()
        && !limitedContext) // limitedContext callers need to handle allele trimming on their own to
                            // keep their perReadAlleleLikelihoodMap alleles in sync
    vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);

    if (annotationEngine != null
        && !limitedContext) { // limitedContext callers need to handle annotations on their own by
                              // calling their own annotationEngine
      // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
      final ReadBackedPileup pileup = rawContext.getBasePileup();
      stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);

      vcCall =
          annotationEngine.annotateContext(
              tracker, refContext, stratifiedContexts, vcCall, perReadAlleleLikelihoodMap);
    }

    return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PoFGT0));
  }
  private Map<String, Object> annotateIndel(AlignmentContext stratifiedContext, VariantContext vc) {

    if (!stratifiedContext.hasExtendedEventPileup()) {
      return null;
    }

    ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup();
    if (pileup == null) return null;
    int totalDepth = pileup.size();

    Map<String, Object> map = new HashMap<String, Object>();
    map.put(getKeyNames().get(0), totalDepth); // put total depth in right away

    if (totalDepth == 0) return map;
    int mq0 = 0; // number of "ref" reads that are acually mq0

    HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
    Allele refAllele = vc.getReference();

    for (Allele allele : vc.getAlternateAlleles()) {

      if (allele.isNoCall()) {
        continue; // this does not look so good, should we die???
      }

      alleleCounts.put(getAlleleRepresentation(allele), 0);
    }

    for (ExtendedEventPileupElement e : pileup.toExtendedIterable()) {

      if (e.getMappingQual() == 0) {
        mq0++;
        continue;
      }

      if (e.isInsertion()) {

        final String b = e.getEventBases();
        if (alleleCounts.containsKey(b)) {
          alleleCounts.put(b, alleleCounts.get(b) + 1);
        }

      } else {
        if (e.isDeletion()) {
          if (e.getEventLength() == refAllele.length()) {
            // this is indeed the deletion allele recorded in VC
            final String b = DEL;
            if (alleleCounts.containsKey(b)) {
              alleleCounts.put(b, alleleCounts.get(b) + 1);
            }
          }
          //                    else {
          //                        System.out.print("   deletion of WRONG length found");
          //                    }
        }
      }
    }

    if (mq0 == totalDepth) return map;

    String[] fracs = new String[alleleCounts.size()];
    for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
      fracs[i] =
          String.format(
              "%.3f",
              ((float) alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))
                  / (totalDepth - mq0));

    map.put(getKeyNames().get(1), fracs);

    // map.put(getKeyNames().get(0), counts);
    return map;
  }