private VariantCallContext estimateReferenceConfidence(
      VariantContext vc,
      Map<String, AlignmentContext> contexts,
      double theta,
      boolean ignoreCoveredSamples,
      double initialPofRef) {
    if (contexts == null) return null;

    double P_of_ref = initialPofRef;

    // for each sample that we haven't examined yet
    for (String sample : samples) {
      boolean isCovered = contexts.containsKey(sample);
      if (ignoreCoveredSamples && isCovered) continue;

      int depth = 0;

      if (isCovered) {
        depth = contexts.get(sample).getBasePileup().depthOfCoverage();
      }

      P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
    }

    return new VariantCallContext(
        vc,
        QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING,
        false);
  }
  private static Map<String, GenotypeLikelihoodsCalculationModel>
      getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {

    final Map<String, GenotypeLikelihoodsCalculationModel> glcm =
        new HashMap<String, GenotypeLikelihoodsCalculationModel>();
    final List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses =
        new PluginManager<GenotypeLikelihoodsCalculationModel>(
                GenotypeLikelihoodsCalculationModel.class)
            .getPlugins();

    for (int i = 0; i < glmClasses.size(); i++) {
      final Class<? extends GenotypeLikelihoodsCalculationModel> glmClass = glmClasses.get(i);
      final String key =
          glmClass
              .getSimpleName()
              .replaceAll("GenotypeLikelihoodsCalculationModel", "")
              .toUpperCase();
      try {
        final Object args[] = new Object[] {UAC, logger};
        final Constructor c =
            glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class);
        glcm.put(key, (GenotypeLikelihoodsCalculationModel) c.newInstance(args));
      } catch (Exception e) {
        throw new UserException(
            "The likelihoods model provided for the -glm argument ("
                + UAC.GLmodel
                + ") is not a valid option: "
                + e.getMessage());
      }
    }

    return glcm;
  }
Example #3
0
  /**
   * Read in a list of ExactCall objects from reader, keeping only those with starts in startsToKeep
   * or all sites (if this is empty)
   *
   * @param reader a just-opened reader sitting at the start of the file
   * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should
   *     be kept
   * @param parser a genome loc parser to create genome locs
   * @return a list of ExactCall objects in reader
   * @throws IOException
   */
  public static List<ExactCall> readExactLog(
      final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser)
      throws IOException {
    if (reader == null) throw new IllegalArgumentException("reader cannot be null");
    if (startsToKeep == null) throw new IllegalArgumentException("startsToKeep cannot be null");
    if (parser == null) throw new IllegalArgumentException("GenomeLocParser cannot be null");

    List<ExactCall> calls = new LinkedList<ExactCall>();

    // skip the header line
    reader.readLine();

    // skip the first "type" line
    reader.readLine();

    while (true) {
      final VariantContextBuilder builder = new VariantContextBuilder();
      final List<Allele> alleles = new ArrayList<Allele>();
      final List<Genotype> genotypes = new ArrayList<Genotype>();
      final double[] posteriors = new double[2];
      final double[] priors = MathUtils.normalizeFromLog10(new double[] {0.5, 0.5}, true);
      final List<Integer> mle = new ArrayList<Integer>();
      final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
      long runtimeNano = -1;

      GenomeLoc currentLoc = null;
      while (true) {
        final String line = reader.readLine();
        if (line == null) return calls;

        final String[] parts = line.split("\t");
        final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]);
        final String variable = parts[1];
        final String key = parts[2];
        final String value = parts[3];

        if (currentLoc == null) currentLoc = lineLoc;

        if (variable.equals("type")) {
          if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) {
            builder.alleles(alleles);
            final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
            builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
            builder.genotypes(genotypes);
            final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[] {}));
            final AFCalcResult result =
                new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
            calls.add(new ExactCall(builder.make(), runtimeNano, result));
          }
          break;
        } else if (variable.equals("allele")) {
          final boolean isRef = key.equals("0");
          alleles.add(Allele.create(value, isRef));
        } else if (variable.equals("PL")) {
          final GenotypeBuilder gb = new GenotypeBuilder(key);
          gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
          genotypes.add(gb.make());
        } else if (variable.equals("log10PosteriorOfAFEq0")) {
          posteriors[0] = Double.valueOf(value);
        } else if (variable.equals("log10PosteriorOfAFGt0")) {
          posteriors[1] = Double.valueOf(value);
        } else if (variable.equals("MLE")) {
          mle.add(Integer.valueOf(value));
        } else if (variable.equals("pNonRefByAllele")) {
          final Allele a = Allele.create(key);
          log10pNonRefByAllele.put(a, Double.valueOf(value));
        } else if (variable.equals("runtime.nano")) {
          runtimeNano = Long.valueOf(value);
        } else {
          // nothing to do
        }
      }
    }
  }
  /**
   * Compute full calls at a given locus. Entry point for engine calls from the UnifiedGenotyper.
   *
   * <p>If allSamples != null, then the output variantCallContext is guarenteed to contain a
   * genotype for every sample in allSamples. If it's null there's no such guarentee. Providing this
   * argument is critical when the resulting calls will be written to a VCF file.
   *
   * @param tracker the meta data tracker
   * @param refContext the reference base
   * @param rawContext contextual information around the locus
   * @param allSamples set of all sample names that we might call (i.e., those in the VCF header)
   * @return the VariantCallContext object
   */
  public List<VariantCallContext> calculateLikelihoodsAndGenotypes(
      final RefMetaDataTracker tracker,
      final ReferenceContext refContext,
      final AlignmentContext rawContext,
      final Set<String> allSamples) {
    final List<VariantCallContext> results = new ArrayList<VariantCallContext>(2);

    final List<GenotypeLikelihoodsCalculationModel.Model> models =
        getGLModelsToUse(tracker, refContext, rawContext);

    final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap>
        perReadAlleleLikelihoodMap =
            new HashMap<
                String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap>();

    if (models.isEmpty()) {
      results.add(
          UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES
                  && UAC.GenotypingMode
                      == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
              ? generateEmptyContext(tracker, refContext, null, rawContext)
              : null);
    } else {
      for (final GenotypeLikelihoodsCalculationModel.Model model : models) {
        perReadAlleleLikelihoodMap.clear();
        final Map<String, AlignmentContext> stratifiedContexts =
            getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
        if (stratifiedContexts == null) {
          results.add(
              UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES
                      && UAC.GenotypingMode
                          == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE
                              .GENOTYPE_GIVEN_ALLELES
                  ? generateEmptyContext(tracker, refContext, null, rawContext)
                  : null);
        } else {
          final VariantContext vc =
              calculateLikelihoods(
                  tracker,
                  refContext,
                  stratifiedContexts,
                  AlignmentContextUtils.ReadOrientation.COMPLETE,
                  null,
                  true,
                  model,
                  perReadAlleleLikelihoodMap);
          if (vc != null)
            results.add(
                calculateGenotypes(
                    tracker,
                    refContext,
                    rawContext,
                    stratifiedContexts,
                    vc,
                    model,
                    true,
                    perReadAlleleLikelihoodMap));
        }
      }
    }

    return results;
  }