@Override protected void setupPipeline(Pipeline pipeline) { // Load the reads. final ReadsDataflowSource readsDataflowSource = new ReadsDataflowSource(bam, pipeline); final SAMFileHeader readsHeader = readsDataflowSource.getHeader(); final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(readsHeader.getSequenceDictionary()) : IntervalUtils.getAllIntervalsForReference(readsHeader.getSequenceDictionary()); final PCollectionView<SAMFileHeader> headerSingleton = ReadsDataflowSource.getHeaderView(pipeline, readsHeader); final PCollection<GATKRead> initialReads = readsDataflowSource.getReadPCollection(intervals); // Apply MarkDuplicates to produce updated GATKReads. final PCollection<GATKRead> markedReads = initialReads.apply(new MarkDuplicates(headerSingleton)); // Load the Variants and the Reference and join them to reads. final VariantsDataflowSource variantsDataflowSource = new VariantsDataflowSource(baseRecalibrationKnownVariants, pipeline); Map<String, String> referenceNameToIdTable = RefAPISource.buildReferenceNameToIdTable(pipeline.getOptions(), referenceName); RefAPIMetadata refAPIMetadata = new RefAPIMetadata(referenceName, referenceNameToIdTable); final PCollection<KV<GATKRead, ReadContextData>> readsWithContext = AddContextDataToRead.add(markedReads, refAPIMetadata, variantsDataflowSource); // Apply BQSR. final PCollection<RecalibrationTables> recalibrationReports = readsWithContext.apply(new BaseRecalibratorStub(headerSingleton)); final PCollectionView<RecalibrationTables> mergedRecalibrationReport = recalibrationReports.apply(View.<RecalibrationTables>asSingleton()); final PCollection<GATKRead> finalReads = markedReads.apply(new ApplyBQSRStub(headerSingleton, mergedRecalibrationReport)); SmallBamWriter.writeToFile(pipeline, finalReads, readsHeader, output); }