private VariantContext findMatchingComp( final VariantContext eval, final Collection<VariantContext> comps) { // if no comps, return null if (comps == null || comps.isEmpty()) return null; // if no eval, return any comp if (eval == null) return comps.iterator().next(); // find all of the matching comps VariantContext lenientMatch = null; for (final VariantContext comp : comps) { switch (doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch)) { case STRICT: return comp; case LENIENT: if (lenientMatch == null) lenientMatch = comp; break; case NO_MATCH: // do nothing } } // nothing matched, just return lenientMatch, which might be null return lenientMatch; }
/** * this is the function that the inheriting class can expect to have called when the command line * system has initialized. * * @return the return code to exit the program with */ protected int execute() throws Exception { engine.setParser(parser); argumentSources.add(this); Walker<?, ?> walker = engine.getWalkerByName(getAnalysisName()); try { // Make sure a valid GATK user key is present, if required. authorizeGATKRun(); engine.setArguments(getArgumentCollection()); // File lists can require a bit of additional expansion. Set these explicitly by the engine. engine.setSAMFileIDs( ListFileUtils.unpackBAMFileList(getArgumentCollection().samFiles, parser)); engine.setWalker(walker); walker.setToolkit(engine); Collection<ReadFilter> filters = engine.createFilters(); engine.setFilters(filters); // load the arguments into the walker / filters. // TODO: The fact that this extra load call exists here when all the parsing happens at the // engine // TODO: level indicates that we're doing something wrong. Turn this around so that the GATK // can drive // TODO: argument processing. loadArgumentsIntoObject(walker); argumentSources.add(walker); Collection<RMDTriplet> rodBindings = ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser); engine.setReferenceMetaDataFiles(rodBindings); for (ReadFilter filter : filters) { loadArgumentsIntoObject(filter); argumentSources.add(filter); } engine.execute(); generateGATKRunReport(walker); } catch (Exception e) { generateGATKRunReport(walker, e); throw e; } // always return 0 return 0; }
private boolean isConcordant(VariantContext vc, Collection<VariantContext> compVCs) { if (vc == null || compVCs == null || compVCs.isEmpty()) return false; // if we're not looking for specific samples then the fact that we have both VCs is enough to // call it concordant. if (NO_SAMPLES_SPECIFIED) return true; // make a list of all samples contained in this variant VC that are being tracked by the user // command line arguments. Set<String> variantSamples = vc.getSampleNames(); variantSamples.retainAll(samples); // check if we can find all samples from the variant rod in the comp rod. for (String sample : variantSamples) { boolean foundSample = false; for (VariantContext compVC : compVCs) { Genotype varG = vc.getGenotype(sample); Genotype compG = compVC.getGenotype(sample); if (haveSameGenotypes(varG, compG)) { foundSample = true; break; } } // if at least one sample doesn't have the same genotype, we don't have concordance if (!foundSample) { return false; } } return true; }
/** * Checks if vc has a variant call for (at least one of) the samples. * * @param vc the variant rod VariantContext. Here, the variant is the dataset you're looking for * discordances to (e.g. HapMap) * @param compVCs the comparison VariantContext (discordance * @return */ private boolean isDiscordant(VariantContext vc, Collection<VariantContext> compVCs) { if (vc == null) return false; // if we're not looking at specific samples then the absence of a compVC means discordance if (NO_SAMPLES_SPECIFIED) return (compVCs == null || compVCs.isEmpty()); // check if we find it in the variant rod Map<String, Genotype> genotypes = vc.getGenotypes(samples); for (Genotype g : genotypes.values()) { if (sampleHasVariant(g)) { // There is a variant called (or filtered with not exclude filtered option set) that is not // HomRef for at least one of the samples. if (compVCs == null) return true; // Look for this sample in the all vcs of the comp ROD track. boolean foundVariant = false; for (VariantContext compVC : compVCs) { if (sampleHasVariant(compVC.getGenotype(g.getSampleName()))) { foundVariant = true; break; } } // if (at least one sample) was not found in all VCs of the comp ROD, we have discordance if (!foundVariant) return true; } } return false; // we only get here if all samples have a variant in the comp rod. }
/** * GATK provides the walker as an argument source. * * @return List of walkers to load dynamically. */ @Override protected Class[] getArgumentSources() { // No walker info? No plugins. if (getAnalysisName() == null) return new Class[] {}; Collection<Class> argumentSources = new ArrayList<Class>(); Walker walker = engine.getWalkerByName(getAnalysisName()); engine.setArguments(getArgumentCollection()); engine.setWalker(walker); walker.setToolkit(engine); argumentSources.add(walker.getClass()); Collection<ReadFilter> filters = engine.createFilters(); for (ReadFilter filter : filters) argumentSources.add(filter.getClass()); Class[] argumentSourcesAsArray = new Class[argumentSources.size()]; return argumentSources.toArray(argumentSourcesAsArray); }
/** Collect relevant information from each variant in the supplied VCFs */ @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { for (NewEvaluationContext nec : evaluationContexts.values()) { synchronized (nec) { nec.update0(tracker, ref, context); } } if (tracker != null) { String aastr = (ancestralAlignments == null) ? null : new String( ancestralAlignments .getSubsequenceAt( ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop()) .getBases()); // --------- track --------- sample - VariantContexts - HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> evalVCs = variantEvalUtils.bindVariantContexts( tracker, ref, evals, byFilterIsEnabled, true, perSampleIsEnabled, mergeEvals); HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> compVCs = variantEvalUtils.bindVariantContexts( tracker, ref, comps, byFilterIsEnabled, false, false, false); // for each eval track for (final RodBinding<VariantContext> evalRod : evals) { final Map<String, Collection<VariantContext>> emptyEvalMap = Collections.emptyMap(); final Map<String, Collection<VariantContext>> evalSet = evalVCs.containsKey(evalRod) ? evalVCs.get(evalRod) : emptyEvalMap; // for each sample stratifier for (final String sampleName : sampleNamesForStratification) { Collection<VariantContext> evalSetBySample = evalSet.get(sampleName); if (evalSetBySample == null) { evalSetBySample = new HashSet<VariantContext>(1); evalSetBySample.add(null); } // for each eval in the track for (VariantContext eval : evalSetBySample) { // deal with ancestral alleles if requested if (eval != null && aastr != null) { eval = new VariantContextBuilder(eval).attribute("ANCESTRALALLELE", aastr).make(); } // for each comp track for (final RodBinding<VariantContext> compRod : comps) { // no sample stratification for comps final HashMap<String, Collection<VariantContext>> compSetHash = compVCs.get(compRod); final Collection<VariantContext> compSet = (compSetHash == null || compSetHash.size() == 0) ? Collections.<VariantContext>emptyList() : compVCs.get(compRod).values().iterator().next(); // find the comp final VariantContext comp = findMatchingComp(eval, compSet); HashMap<VariantStratifier, List<String>> stateMap = new HashMap<VariantStratifier, List<String>>(); for (VariantStratifier vs : stratificationObjects) { List<String> states = vs.getRelevantStates( ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName); stateMap.put(vs, states); } ArrayList<StateKey> stateKeys = new ArrayList<StateKey>(); variantEvalUtils.initializeStateKeys(stateMap, null, null, stateKeys); HashSet<StateKey> stateKeysHash = new HashSet<StateKey>(stateKeys); for (StateKey stateKey : stateKeysHash) { NewEvaluationContext nec = evaluationContexts.get(stateKey); // eval against the comp synchronized (nec) { nec.apply(tracker, ref, context, comp, eval); } // eval=null against all comps of different type that aren't bound to another eval for (VariantContext otherComp : compSet) { if (otherComp != comp && !compHasMatchingEval(otherComp, evalSetBySample)) { synchronized (nec) { nec.apply(tracker, ref, context, otherComp, null); } } } } } } } if (mergeEvals) break; // stop processing the eval tracks } } return null; }
/** * Subset VC record if necessary and emit the modified record (provided it satisfies criteria for * printing) * * @param tracker the ROD tracker * @param ref reference information * @param context alignment info * @return 1 if the record was printed to the output file, 0 if otherwise */ @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker == null) return 0; Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation()); if (vcs == null || vcs.size() == 0) { return 0; } for (VariantContext vc : vcs) { if (MENDELIAN_VIOLATIONS) { boolean foundMV = false; for (MendelianViolation mv : mvSet) { if (mv.isViolation(vc)) { foundMV = true; // System.out.println(vc.toString()); if (outMVFile != null) outMVFileStream.format( "MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + "childG=%s childGL=%s\n", vc.getChr(), vc.getStart(), vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getChromosomeCount(vc.getAlternateAllele(0)), mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(), vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), vc.getGenotype(mv.getSampleChild()).toBriefString(), vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString()); } } if (!foundMV) break; } if (DISCORDANCE_ONLY) { Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation()); if (!isDiscordant(vc, compVCs)) return 0; } if (CONCORDANCE_ONLY) { Collection<VariantContext> compVCs = tracker.getValues(concordanceTrack, context.getLocation()); if (!isConcordant(vc, compVCs)) return 0; } if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic()) continue; if (alleleRestriction.equals(NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic()) continue; if (!selectedTypes.contains(vc.getType())) continue; VariantContext sub = subsetRecord(vc, samples); if ((sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED)) { for (VariantContextUtils.JexlVCMatchExp jexl : jexls) { if (!VariantContextUtils.match(sub, jexl)) { return 0; } } if (SELECT_RANDOM_NUMBER) { randomlyAddVariant(++variantNumber, sub, ref.getBase()); } else if (!SELECT_RANDOM_FRACTION || (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { vcfWriter.add(sub); } } } return 1; }
/** Collect relevant information from each variant in the supplied VCFs */ @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { // we track the processed bp and expose this for modules instead of wasting CPU power on // calculating // the same thing over and over in evals that want the processed bp synchronized (this) { nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); } if (tracker != null) { String aastr = (ancestralAlignments == null) ? null : new String( ancestralAlignments .getSubsequenceAt( ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop()) .getBases()); // // update the dynamic stratifications // for (final VariantContext vc : tracker.getValues(evals, ref.getLocus())) { // // don't worry -- DynamicStratification only work with one eval object // for ( final DynamicStratification ds : dynamicStratifications ) { // ds.update(vc); // } // } // --------- track --------- sample - VariantContexts - HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> evalVCs = variantEvalUtils.bindVariantContexts( tracker, ref, evals, byFilterIsEnabled, true, perSampleIsEnabled, mergeEvals); HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> compVCs = variantEvalUtils.bindVariantContexts( tracker, ref, comps, byFilterIsEnabled, false, false, false); // for each eval track for (final RodBinding<VariantContext> evalRod : evals) { final Map<String, Collection<VariantContext>> emptyEvalMap = Collections.emptyMap(); final Map<String, Collection<VariantContext>> evalSet = evalVCs.containsKey(evalRod) ? evalVCs.get(evalRod) : emptyEvalMap; // for each sample stratifier for (final String sampleName : sampleNamesForStratification) { Collection<VariantContext> evalSetBySample = evalSet.get(sampleName); if (evalSetBySample == null) { evalSetBySample = new HashSet<VariantContext>(1); evalSetBySample.add(null); } // for each eval in the track for (VariantContext eval : evalSetBySample) { // deal with ancestral alleles if requested if (eval != null && aastr != null) { eval = new VariantContextBuilder(eval).attribute("ANCESTRALALLELE", aastr).make(); } // for each comp track for (final RodBinding<VariantContext> compRod : comps) { // no sample stratification for comps final HashMap<String, Collection<VariantContext>> compSetHash = compVCs.get(compRod); final Collection<VariantContext> compSet = (compSetHash == null || compSetHash.size() == 0) ? Collections.<VariantContext>emptyList() : compVCs.get(compRod).values().iterator().next(); // find the comp final VariantContext comp = findMatchingComp(eval, compSet); for (EvaluationContext nec : getEvaluationContexts( tracker, ref, eval, evalRod.getName(), comp, compRod.getName(), sampleName)) { // eval against the comp synchronized (nec) { nec.apply(tracker, ref, context, comp, eval); } // eval=null against all comps of different type that aren't bound to another eval for (VariantContext otherComp : compSet) { if (otherComp != comp && !compHasMatchingEval(otherComp, evalSetBySample)) { synchronized (nec) { nec.apply(tracker, ref, context, otherComp, null); } } } } } } } if (mergeEvals) break; // stop processing the eval tracks } } return null; }