@Test(dataProvider = "bases") public void fullTest( List<GATKRead> reads, List<Variant> variantList, List<KV<GATKRead, ReferenceBases>> kvReadRefBases, List<KV<GATKRead, ReadContextData>> kvReadContextData, List<SimpleInterval> intervals, List<KV<GATKRead, Iterable<Variant>>> kvReadiVariant) { Pipeline p = GATKTestPipeline.create(); DataflowUtils.registerGATKCoders(p); PCollection<GATKRead> pReads = DataflowTestUtils.pCollectionCreateAndVerify(p, reads, new GATKReadCoder()); PCollection<Variant> pVariant = p.apply(Create.of(variantList)); VariantsDataflowSource mockVariantsSource = mock(VariantsDataflowSource.class); when(mockVariantsSource.getAllVariants()).thenReturn(pVariant); RefAPISource mockSource = mock(RefAPISource.class, withSettings().serializable()); for (SimpleInterval i : intervals) { when(mockSource.getReferenceBases( any(PipelineOptions.class), any(RefAPIMetadata.class), eq(i))) .thenReturn(FakeReferenceSource.bases(i)); } String referenceName = "refName"; String refId = "0xbjfjd23f"; Map<String, String> referenceNameToIdTable = Maps.newHashMap(); referenceNameToIdTable.put(referenceName, refId); RefAPIMetadata refAPIMetadata = new RefAPIMetadata(referenceName, referenceNameToIdTable); RefAPISource.setRefAPISource(mockSource); PCollection<KV<GATKRead, ReadContextData>> result = AddContextDataToRead.add(pReads, /*mockSource,*/ refAPIMetadata, mockVariantsSource); PCollection<KV<GATKRead, ReadContextData>> pkvReadContextData = p.apply( Create.of(kvReadContextData) .withCoder(KvCoder.of(new GATKReadCoder(), new ReadContextDataCoder()))); DataflowTestUtils.keyReadContextDataMatcher(result, pkvReadContextData); p.run(); }
@Override protected void setupPipeline(Pipeline pipeline) { // Load the reads. final ReadsDataflowSource readsDataflowSource = new ReadsDataflowSource(bam, pipeline); final SAMFileHeader readsHeader = readsDataflowSource.getHeader(); final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(readsHeader.getSequenceDictionary()) : IntervalUtils.getAllIntervalsForReference(readsHeader.getSequenceDictionary()); final PCollectionView<SAMFileHeader> headerSingleton = ReadsDataflowSource.getHeaderView(pipeline, readsHeader); final PCollection<GATKRead> initialReads = readsDataflowSource.getReadPCollection(intervals); // Apply MarkDuplicates to produce updated GATKReads. final PCollection<GATKRead> markedReads = initialReads.apply(new MarkDuplicates(headerSingleton)); // Load the Variants and the Reference and join them to reads. final VariantsDataflowSource variantsDataflowSource = new VariantsDataflowSource(baseRecalibrationKnownVariants, pipeline); Map<String, String> referenceNameToIdTable = RefAPISource.buildReferenceNameToIdTable(pipeline.getOptions(), referenceName); RefAPIMetadata refAPIMetadata = new RefAPIMetadata(referenceName, referenceNameToIdTable); final PCollection<KV<GATKRead, ReadContextData>> readsWithContext = AddContextDataToRead.add(markedReads, refAPIMetadata, variantsDataflowSource); // Apply BQSR. final PCollection<RecalibrationTables> recalibrationReports = readsWithContext.apply(new BaseRecalibratorStub(headerSingleton)); final PCollectionView<RecalibrationTables> mergedRecalibrationReport = recalibrationReports.apply(View.<RecalibrationTables>asSingleton()); final PCollection<GATKRead> finalReads = markedReads.apply(new ApplyBQSRStub(headerSingleton, mergedRecalibrationReport)); SmallBamWriter.writeToFile(pipeline, finalReads, readsHeader, output); }