/** * Compute GLs at a given locus. Entry point for engine calls from UGCalcLikelihoods. * * @param tracker the meta data tracker * @param refContext the reference base * @param rawContext contextual information around the locus * @param perReadAlleleLikelihoodMap Map to store per-sample, per-read, per-allele likelihoods * (only used for indels) * @return the VariantContext object */ public VariantContext calculateLikelihoods( final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext rawContext, final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) { final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext); if (models.isEmpty()) { return null; } for (final GenotypeLikelihoodsCalculationModel.Model model : models) { final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model); // return the first valid one we encounter if (stratifiedContexts != null) return calculateLikelihoods( tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap); } return null; }
/** * Compute genotypes at a given locus. Entry point for engine calls from UGCallVariants. * * @param tracker the meta data tracker * @param refContext the reference base * @param rawContext contextual information around the locus * @param vc the GL-annotated variant context * @return the VariantCallContext object */ public VariantCallContext calculateGenotypes( final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext rawContext, final VariantContext vc) { final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext); if (models.isEmpty()) { return null; } // return the first one final GenotypeLikelihoodsCalculationModel.Model model = models.get(0); final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model); return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, null); }
/** * Read in a list of ExactCall objects from reader, keeping only those with starts in startsToKeep * or all sites (if this is empty) * * @param reader a just-opened reader sitting at the start of the file * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should * be kept * @param parser a genome loc parser to create genome locs * @return a list of ExactCall objects in reader * @throws IOException */ public static List<ExactCall> readExactLog( final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser) throws IOException { if (reader == null) throw new IllegalArgumentException("reader cannot be null"); if (startsToKeep == null) throw new IllegalArgumentException("startsToKeep cannot be null"); if (parser == null) throw new IllegalArgumentException("GenomeLocParser cannot be null"); List<ExactCall> calls = new LinkedList<ExactCall>(); // skip the header line reader.readLine(); // skip the first "type" line reader.readLine(); while (true) { final VariantContextBuilder builder = new VariantContextBuilder(); final List<Allele> alleles = new ArrayList<Allele>(); final List<Genotype> genotypes = new ArrayList<Genotype>(); final double[] posteriors = new double[2]; final double[] priors = MathUtils.normalizeFromLog10(new double[] {0.5, 0.5}, true); final List<Integer> mle = new ArrayList<Integer>(); final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(); long runtimeNano = -1; GenomeLoc currentLoc = null; while (true) { final String line = reader.readLine(); if (line == null) return calls; final String[] parts = line.split("\t"); final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]); final String variable = parts[1]; final String key = parts[2]; final String value = parts[3]; if (currentLoc == null) currentLoc = lineLoc; if (variable.equals("type")) { if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) { builder.alleles(alleles); final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); builder.genotypes(genotypes); final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[] {})); final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele); calls.add(new ExactCall(builder.make(), runtimeNano, result)); } break; } else if (variable.equals("allele")) { final boolean isRef = key.equals("0"); alleles.add(Allele.create(value, isRef)); } else if (variable.equals("PL")) { final GenotypeBuilder gb = new GenotypeBuilder(key); gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); genotypes.add(gb.make()); } else if (variable.equals("log10PosteriorOfAFEq0")) { posteriors[0] = Double.valueOf(value); } else if (variable.equals("log10PosteriorOfAFGt0")) { posteriors[1] = Double.valueOf(value); } else if (variable.equals("MLE")) { mle.add(Integer.valueOf(value)); } else if (variable.equals("pNonRefByAllele")) { final Allele a = Allele.create(key); log10pNonRefByAllele.put(a, Double.valueOf(value)); } else if (variable.equals("runtime.nano")) { runtimeNano = Long.valueOf(value); } else { // nothing to do } } } }
static VariantContext reallyMergeIntoMNP( VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) { int startInter = vc1.getEnd() + 1; int endInter = vc2.getStart() - 1; byte[] intermediateBases = null; if (startInter <= endInter) { intermediateBases = referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases(); StringUtil.toUpperCase(intermediateBases); } MergedAllelesData mergeData = new MergedAllelesData( intermediateBases, vc1, vc2); // ensures that the reference allele is added GenotypesContext mergedGenotypes = GenotypesContext.create(); for (final Genotype gt1 : vc1.getGenotypes()) { Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); List<Allele> site1Alleles = gt1.getAlleles(); List<Allele> site2Alleles = gt2.getAlleles(); List<Allele> mergedAllelesForSample = new LinkedList<Allele>(); /* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records, we preserve phase information (if any) relative to whatever precedes vc1: */ Iterator<Allele> all2It = site2Alleles.iterator(); for (Allele all1 : site1Alleles) { Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable() Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2); mergedAllelesForSample.add(mergedAllele); } double mergedGQ = Math.max(gt1.getLog10PError(), gt2.getLog10PError()); Set<String> mergedGtFilters = new HashSet< String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered Map<String, Object> mergedGtAttribs = new HashMap<String, Object>(); PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2); if (phaseQual.PQ != null) mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ); Genotype mergedGt = new Genotype( gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased); mergedGenotypes.add(mergedGt); } String mergedName = mergeVariantContextNames(vc1.getSource(), vc2.getSource()); double mergedLog10PError = Math.min(vc1.getLog10PError(), vc2.getLog10PError()); Set<String> mergedFilters = new HashSet< String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered Map<String, Object> mergedAttribs = mergeVariantContextAttributes(vc1, vc2); // ids List<String> mergedIDs = new ArrayList<String>(); if (vc1.hasID()) mergedIDs.add(vc1.getID()); if (vc2.hasID()) mergedIDs.add(vc2.getID()); String mergedID = mergedIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs); VariantContextBuilder mergedBuilder = new VariantContextBuilder( mergedName, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles()) .id(mergedID) .genotypes(mergedGenotypes) .log10PError(mergedLog10PError) .filters(mergedFilters) .attributes(mergedAttribs); VariantContextUtils.calculateChromosomeCounts(mergedBuilder, true); return mergedBuilder.make(); }
/** * Compute full calls at a given locus. Entry point for engine calls from the UnifiedGenotyper. * * <p>If allSamples != null, then the output variantCallContext is guarenteed to contain a * genotype for every sample in allSamples. If it's null there's no such guarentee. Providing this * argument is critical when the resulting calls will be written to a VCF file. * * @param tracker the meta data tracker * @param refContext the reference base * @param rawContext contextual information around the locus * @param allSamples set of all sample names that we might call (i.e., those in the VCF header) * @return the VariantCallContext object */ public List<VariantCallContext> calculateLikelihoodsAndGenotypes( final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext rawContext, final Set<String> allSamples) { final List<VariantCallContext> results = new ArrayList<VariantCallContext>(2); final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext); final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap = new HashMap< String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap>(); if (models.isEmpty()) { results.add( UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null); } else { for (final GenotypeLikelihoodsCalculationModel.Model model : models) { perReadAlleleLikelihoodMap.clear(); final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model); if (stratifiedContexts == null) { results.add( UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE .GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null); } else { final VariantContext vc = calculateLikelihoods( tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap); if (vc != null) results.add( calculateGenotypes( tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap)); } } } return results; }