private VariantCallContext estimateReferenceConfidence( VariantContext vc, Map<String, AlignmentContext> contexts, double theta, boolean ignoreCoveredSamples, double initialPofRef) { if (contexts == null) return null; double P_of_ref = initialPofRef; // for each sample that we haven't examined yet for (String sample : samples) { boolean isCovered = contexts.containsKey(sample); if (ignoreCoveredSamples && isCovered) continue; int depth = 0; if (isCovered) { depth = contexts.get(sample).getBasePileup().depthOfCoverage(); } P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth); } return new VariantCallContext( vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false); }
private static Map<String, GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) { final Map<String, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<String, GenotypeLikelihoodsCalculationModel>(); final List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses = new PluginManager<GenotypeLikelihoodsCalculationModel>( GenotypeLikelihoodsCalculationModel.class) .getPlugins(); for (int i = 0; i < glmClasses.size(); i++) { final Class<? extends GenotypeLikelihoodsCalculationModel> glmClass = glmClasses.get(i); final String key = glmClass .getSimpleName() .replaceAll("GenotypeLikelihoodsCalculationModel", "") .toUpperCase(); try { final Object args[] = new Object[] {UAC, logger}; final Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class); glcm.put(key, (GenotypeLikelihoodsCalculationModel) c.newInstance(args)); } catch (Exception e) { throw new UserException( "The likelihoods model provided for the -glm argument (" + UAC.GLmodel + ") is not a valid option: " + e.getMessage()); } } return glcm; }
/** * Read in a list of ExactCall objects from reader, keeping only those with starts in startsToKeep * or all sites (if this is empty) * * @param reader a just-opened reader sitting at the start of the file * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should * be kept * @param parser a genome loc parser to create genome locs * @return a list of ExactCall objects in reader * @throws IOException */ public static List<ExactCall> readExactLog( final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser) throws IOException { if (reader == null) throw new IllegalArgumentException("reader cannot be null"); if (startsToKeep == null) throw new IllegalArgumentException("startsToKeep cannot be null"); if (parser == null) throw new IllegalArgumentException("GenomeLocParser cannot be null"); List<ExactCall> calls = new LinkedList<ExactCall>(); // skip the header line reader.readLine(); // skip the first "type" line reader.readLine(); while (true) { final VariantContextBuilder builder = new VariantContextBuilder(); final List<Allele> alleles = new ArrayList<Allele>(); final List<Genotype> genotypes = new ArrayList<Genotype>(); final double[] posteriors = new double[2]; final double[] priors = MathUtils.normalizeFromLog10(new double[] {0.5, 0.5}, true); final List<Integer> mle = new ArrayList<Integer>(); final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(); long runtimeNano = -1; GenomeLoc currentLoc = null; while (true) { final String line = reader.readLine(); if (line == null) return calls; final String[] parts = line.split("\t"); final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]); final String variable = parts[1]; final String key = parts[2]; final String value = parts[3]; if (currentLoc == null) currentLoc = lineLoc; if (variable.equals("type")) { if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) { builder.alleles(alleles); final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); builder.genotypes(genotypes); final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[] {})); final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele); calls.add(new ExactCall(builder.make(), runtimeNano, result)); } break; } else if (variable.equals("allele")) { final boolean isRef = key.equals("0"); alleles.add(Allele.create(value, isRef)); } else if (variable.equals("PL")) { final GenotypeBuilder gb = new GenotypeBuilder(key); gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); genotypes.add(gb.make()); } else if (variable.equals("log10PosteriorOfAFEq0")) { posteriors[0] = Double.valueOf(value); } else if (variable.equals("log10PosteriorOfAFGt0")) { posteriors[1] = Double.valueOf(value); } else if (variable.equals("MLE")) { mle.add(Integer.valueOf(value)); } else if (variable.equals("pNonRefByAllele")) { final Allele a = Allele.create(key); log10pNonRefByAllele.put(a, Double.valueOf(value)); } else if (variable.equals("runtime.nano")) { runtimeNano = Long.valueOf(value); } else { // nothing to do } } } }
/** * Compute full calls at a given locus. Entry point for engine calls from the UnifiedGenotyper. * * <p>If allSamples != null, then the output variantCallContext is guarenteed to contain a * genotype for every sample in allSamples. If it's null there's no such guarentee. Providing this * argument is critical when the resulting calls will be written to a VCF file. * * @param tracker the meta data tracker * @param refContext the reference base * @param rawContext contextual information around the locus * @param allSamples set of all sample names that we might call (i.e., those in the VCF header) * @return the VariantCallContext object */ public List<VariantCallContext> calculateLikelihoodsAndGenotypes( final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext rawContext, final Set<String> allSamples) { final List<VariantCallContext> results = new ArrayList<VariantCallContext>(2); final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext); final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap = new HashMap< String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap>(); if (models.isEmpty()) { results.add( UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null); } else { for (final GenotypeLikelihoodsCalculationModel.Model model : models) { perReadAlleleLikelihoodMap.clear(); final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model); if (stratifiedContexts == null) { results.add( UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE .GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null); } else { final VariantContext vc = calculateLikelihoods( tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap); if (vc != null) results.add( calculateGenotypes( tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap)); } } } return results; }