@Test public void testCountsFromLocusTraversal() { final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); engine.setGenomeLocParser(genomeLocParser); final Collection<SAMReaderID> samFiles = new ArrayList<>(); final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags()); samFiles.add(readerID); final SAMDataSource dataSource = new SAMDataSource( samFiles, new ThreadAllocation(), null, genomeLocParser, false, SAMFileReader.ValidationStringency.STRICT, null, null, new ValidationExclusion(), new ArrayList<ReadFilter>(), new ArrayList<ReadTransformer>(), false, (byte) 30, false, true); engine.setReadsDataSource(dataSource); final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader()); final TraverseLociNano traverseLociNano = new TraverseLociNano(1); final DummyLocusWalker walker = new DummyLocusWalker(); traverseLociNano.initialize(engine, walker, null); for (final Shard shard : dataSource.createShardIteratorOverAllReads(new LocusShardBalancer())) { final WindowMaker windowMaker = new WindowMaker( shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples); for (WindowMaker.WindowMakerIterator window : windowMaker) { final LocusShardDataProvider dataProvider = new LocusShardDataProvider( shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()); traverseLociNano.traverse(walker, dataProvider, 0); dataProvider.close(); } windowMaker.close(); } // dataSource.close(); Assert.assertEquals( engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig); Assert.assertEquals( engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig); }
// --------------------------------------------------------------------------------------------------------- // // Public interface functions // // --------------------------------------------------------------------------------------------------------- @Requires({"toolkit != null", "UAC != null"}) public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { this( toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), VariantContextUtils.DEFAULT_PLOIDY); }
private void initializeVcfWriter() { final List<String> inputNames = Arrays.asList(validation.getName()); // setup the header fields Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); hInfo.add( new VCFFilterHeaderLine( "bootstrap", "This site used for genotype bootstrapping with ProduceBeagleInputWalker")); bootstrapVCFOutput.writeHeader( new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames))); }
public void initialize() { samples = SampleUtils.getSampleListWithVCFHeader( getToolkit(), Arrays.asList(variantCollection.variants.getName())); beagleWriter.print("marker alleleA alleleB"); for (String sample : samples) beagleWriter.print(String.format(" %s %s %s", sample, sample, sample)); beagleWriter.println(); if (bootstrapVCFOutput != null) { initializeVcfWriter(); } if (VQSRCalibrationFile != null) { VQSRCalibrator = VQSRCalibrationCurve.readFromFile(VQSRCalibrationFile); logger.info("Read calibration curve"); VQSRCalibrator.printInfo(logger); } }
/** Initialize the stratifications, evaluations, evaluation contexts, and reporting object */ public void initialize() { // Just list the modules, and exit quickly. if (LIST) { variantEvalUtils.listModulesAndExit(); } // maintain the full list of comps comps.addAll(compsProvided); if (dbsnp.dbsnp.isBound()) { comps.add(dbsnp.dbsnp); knowns.add(dbsnp.dbsnp); } // Add a dummy comp track if none exists if (comps.size() == 0) comps.add( new RodBinding<VariantContext>(VariantContext.class, "none", "UNBOUND", "", new Tags())); // Set up set of additional knowns for (RodBinding<VariantContext> compRod : comps) { if (KNOWN_NAMES.contains(compRod.getName())) knowns.add(compRod); } // Now that we have all the rods categorized, determine the sample list from the eval rods. Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), evals); Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); // Load the sample list sampleNamesForEvaluation.addAll( SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS)); numSamples = NUM_SAMPLES > 0 ? NUM_SAMPLES : sampleNamesForEvaluation.size(); if (Arrays.asList(STRATIFICATIONS_TO_USE).contains("Sample")) { sampleNamesForStratification.addAll(sampleNamesForEvaluation); } sampleNamesForStratification.add(ALL_SAMPLE_NAME); // Initialize select expressions for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) { SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp); jexlExpressions.add(sjexl); } // Initialize the set of stratifications and evaluations to use stratificationObjects = variantEvalUtils.initializeStratificationObjects( this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); for (VariantStratifier vs : getStratificationObjects()) { if (vs.getName().equals("Filter")) byFilterIsEnabled = true; else if (vs.getName().equals("Sample")) perSampleIsEnabled = true; } if (intervalsFile != null) { boolean fail = true; for (final VariantStratifier vs : stratificationObjects) { if (vs.getClass().equals(IntervalStratification.class)) fail = false; } if (fail) throw new UserException.BadArgumentValue( "ST", "stratIntervals argument provided but -ST IntervalStratification not provided"); } // Initialize the evaluation contexts evaluationContexts = variantEvalUtils.initializeEvaluationContexts( stratificationObjects, evaluationObjects, null, null); // Initialize report table report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationObjects); // Load ancestral alignments if (ancestralAlignmentsFile != null) { try { ancestralAlignments = new IndexedFastaSequenceFile(ancestralAlignmentsFile); } catch (FileNotFoundException e) { throw new ReviewedStingException( String.format( "The ancestral alignments file, '%s', could not be found", ancestralAlignmentsFile.getAbsolutePath())); } } // initialize CNVs if (knownCNVsFile != null) { knownCNVsByContig = createIntervalTreeByContig(knownCNVsFile); } }
/** Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher */ public void initialize() { // Get list of samples to include in the output List<String> rodNames = Arrays.asList(variantCollection.variants.getName()); Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); TreeSet<String> vcfSamples = new TreeSet<String>( SampleUtils.getSampleList( vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles); Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions); // first, add any requested samples samples.addAll(samplesFromFile); samples.addAll(samplesFromExpressions); samples.addAll(sampleNames); // if none were requested, we want all of them if (samples.isEmpty()) { samples.addAll(vcfSamples); NO_SAMPLES_SPECIFIED = true; } // now, exclude any requested samples Collection<String> XLsamplesFromFile = SampleUtils.getSamplesFromFiles(XLsampleFiles); samples.removeAll(XLsamplesFromFile); samples.removeAll(XLsampleNames); if (samples.size() == 0 && !NO_SAMPLES_SPECIFIED) throw new UserException( "All samples requested to be included were also requested to be excluded."); for (String sample : samples) logger.info("Including sample '" + sample + "'"); // if user specified types to include, add these, otherwise, add all possible variant context // types to list of vc types to include if (TYPES_TO_INCLUDE.isEmpty()) { for (VariantContext.Type t : VariantContext.Type.values()) selectedTypes.add(t); } else { for (VariantContext.Type t : TYPES_TO_INCLUDE) selectedTypes.add(t); } // Initialize VCF header Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger); headerLines.add(new VCFHeaderLine("source", "SelectVariants")); if (KEEP_ORIGINAL_CHR_COUNTS) { headerLines.add( new VCFFormatHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC")); headerLines.add( new VCFFormatHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF")); headerLines.add( new VCFFormatHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN")); } vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) { // It's not necessary that the user supply select names for the JEXL expressions, since those // expressions will only be needed for omitting records. Make up the select names here. selectNames.add(String.format("select-%d", i)); } jexls = VariantContextUtils.initializeMatchExps(selectNames, SELECT_EXPRESSIONS); // Look at the parameters to decide which analysis to perform DISCORDANCE_ONLY = discordanceTrack.isBound(); if (DISCORDANCE_ONLY) logger.info( "Selecting only variants discordant with the track: " + discordanceTrack.getName()); CONCORDANCE_ONLY = concordanceTrack.isBound(); if (CONCORDANCE_ONLY) logger.info( "Selecting only variants concordant with the track: " + concordanceTrack.getName()); if (MENDELIAN_VIOLATIONS) { if (FAMILY_STRUCTURE_FILE != null) { try { for (final String line : new XReadLines(FAMILY_STRUCTURE_FILE)) { MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD); if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom())) mvSet.add(mv); } } catch (FileNotFoundException e) { throw new UserException.CouldNotReadInputFile(FAMILY_STRUCTURE_FILE, e); } if (outMVFile != null) try { outMVFileStream = new PrintStream(outMVFile); } catch (FileNotFoundException e) { throw new UserException.CouldNotCreateOutputFile( outMVFile, "Can't open output file", e); } } else mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); } else if (!FAMILY_STRUCTURE.isEmpty()) { mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); MENDELIAN_VIOLATIONS = true; } SELECT_RANDOM_NUMBER = numRandom > 0; if (SELECT_RANDOM_NUMBER) { logger.info("Selecting " + numRandom + " variants at random from the variant track"); variantArray = new RandomVariantStructure[numRandom]; } SELECT_RANDOM_FRACTION = fractionRandom > 0; if (SELECT_RANDOM_FRACTION) logger.info( "Selecting approximately " + 100.0 * fractionRandom + "% of the variants at random from the variant track"); }
public void initialize() { for (final Tranche t : Tranche.readTranches(TRANCHES_FILE)) { if (t.ts >= TS_FILTER_LEVEL) { tranches.add(t); } logger.info(String.format("Read tranche " + t)); } Collections.reverse( tranches); // this algorithm wants the tranches ordered from best (lowest truth sensitivity) // to worst (highest truth sensitivity) for (final RodBinding rod : input) { inputNames.add(rod.getName()); } if (IGNORE_INPUT_FILTERS != null) { ignoreInputFilterSet.addAll(Arrays.asList(IGNORE_INPUT_FILTERS)); } // setup the header fields final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames)); addVQSRStandardHeaderLines(hInfo); final TreeSet<String> samples = new TreeSet<String>(); samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)); if (tranches.size() >= 2) { for (int iii = 0; iii < tranches.size() - 1; iii++) { final Tranche t = tranches.get(iii); hInfo.add( new VCFFilterHeaderLine( t.name, String.format( "Truth sensitivity tranche level for " + t.model.toString() + " model at VQS Lod: " + t.minVQSLod + " <= x < " + tranches.get(iii + 1).minVQSLod))); } } if (tranches.size() >= 1) { hInfo.add( new VCFFilterHeaderLine( tranches.get(0).name + "+", String.format( "Truth sensitivity tranche level for " + tranches.get(0).model.toString() + " model at VQS Lod < " + tranches.get(0).minVQSLod))); } else { throw new UserException( "No tranches were found in the file or were above the truth sensitivity filter level " + TS_FILTER_LEVEL); } logger.info("Keeping all variants in tranche " + tranches.get(tranches.size() - 1)); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); }