/** Initialize the stratifications, evaluations, evaluation contexts, and reporting object */ public void initialize() { // Just list the modules, and exit quickly. if (LIST) { variantEvalUtils.listModulesAndExit(); } // maintain the full list of comps comps.addAll(compsProvided); if (dbsnp.dbsnp.isBound()) { comps.add(dbsnp.dbsnp); knowns.add(dbsnp.dbsnp); } // Add a dummy comp track if none exists if (comps.size() == 0) comps.add( new RodBinding<VariantContext>(VariantContext.class, "none", "UNBOUND", "", new Tags())); // Set up set of additional knowns for (RodBinding<VariantContext> compRod : comps) { if (KNOWN_NAMES.contains(compRod.getName())) knowns.add(compRod); } // Now that we have all the rods categorized, determine the sample list from the eval rods. Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), evals); Set<String> vcfSamples = SampleUtils.getSampleList( vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); // Load the sample list, using an intermediate tree set to sort the samples final Set<String> allSampleNames = SampleUtils.getSamplesFromCommandLineInput(vcfSamples); sampleNamesForEvaluation.addAll( new TreeSet<String>( SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS))); isSubsettingSamples = !sampleNamesForEvaluation.containsAll(allSampleNames); if (Arrays.asList(STRATIFICATIONS_TO_USE).contains("Sample")) { sampleNamesForStratification.addAll(sampleNamesForEvaluation); } sampleNamesForStratification.add(ALL_SAMPLE_NAME); // Initialize select expressions for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) { SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp); jexlExpressions.add(sjexl); } // Initialize the set of stratifications and evaluations to use // The list of stratifiers and evaluators to use final List<VariantStratifier> stratificationObjects = variantEvalUtils.initializeStratificationObjects( NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); final Set<Class<? extends VariantEvaluator>> evaluationClasses = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); checkForIncompatibleEvaluatorsAndStratifiers(stratificationObjects, evaluationClasses); for (VariantStratifier vs : stratificationObjects) { if (vs.getName().equals("Filter")) byFilterIsEnabled = true; else if (vs.getName().equals("Sample")) perSampleIsEnabled = true; } if (intervalsFile != null) { boolean fail = true; for (final VariantStratifier vs : stratificationObjects) { if (vs.getClass().equals(IntervalStratification.class)) fail = false; } if (fail) throw new UserException.BadArgumentValue( "ST", "stratIntervals argument provided but -ST IntervalStratification not provided"); } // Initialize the evaluation contexts createStratificationStates(stratificationObjects, evaluationClasses); // Load ancestral alignments if (ancestralAlignmentsFile != null) { try { ancestralAlignments = new IndexedFastaSequenceFile(ancestralAlignmentsFile); } catch (FileNotFoundException e) { throw new ReviewedStingException( String.format( "The ancestral alignments file, '%s', could not be found", ancestralAlignmentsFile.getAbsolutePath())); } } // initialize CNVs if (knownCNVsFile != null) { knownCNVsByContig = createIntervalTreeByContig(knownCNVsFile); } }
public void initialize() { for (final Tranche t : Tranche.readTranches(TRANCHES_FILE)) { if (t.ts >= TS_FILTER_LEVEL) { tranches.add(t); } logger.info(String.format("Read tranche " + t)); } Collections.reverse( tranches); // this algorithm wants the tranches ordered from best (lowest truth sensitivity) // to worst (highest truth sensitivity) for (final RodBinding rod : input) { inputNames.add(rod.getName()); } if (IGNORE_INPUT_FILTERS != null) { ignoreInputFilterSet.addAll(Arrays.asList(IGNORE_INPUT_FILTERS)); } // setup the header fields final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames)); addVQSRStandardHeaderLines(hInfo); final TreeSet<String> samples = new TreeSet<String>(); samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)); if (tranches.size() >= 2) { for (int iii = 0; iii < tranches.size() - 1; iii++) { final Tranche t = tranches.get(iii); hInfo.add( new VCFFilterHeaderLine( t.name, String.format( "Truth sensitivity tranche level for " + t.model.toString() + " model at VQS Lod: " + t.minVQSLod + " <= x < " + tranches.get(iii + 1).minVQSLod))); } } if (tranches.size() >= 1) { hInfo.add( new VCFFilterHeaderLine( tranches.get(0).name + "+", String.format( "Truth sensitivity tranche level for " + tranches.get(0).model.toString() + " model at VQS Lod < " + tranches.get(0).minVQSLod))); } else { throw new UserException( "No tranches were found in the file or were above the truth sensitivity filter level " + TS_FILTER_LEVEL); } logger.info("Keeping all variants in tranche " + tranches.get(tranches.size() - 1)); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); }