Esempio n. 1
0
 public static void writeToFile(
     Pipeline pipeline,
     PCollection<GATKRead> reads,
     final SAMFileHeader header,
     final String destPath) {
   writeToFile(pipeline, reads, header, destPath, false);
 }
  @Override
  protected void setupPipeline(Pipeline pipeline) {
    // Load the reads.
    final ReadsDataflowSource readsDataflowSource = new ReadsDataflowSource(bam, pipeline);
    final SAMFileHeader readsHeader = readsDataflowSource.getHeader();
    final List<SimpleInterval> intervals =
        intervalArgumentCollection.intervalsSpecified()
            ? intervalArgumentCollection.getIntervals(readsHeader.getSequenceDictionary())
            : IntervalUtils.getAllIntervalsForReference(readsHeader.getSequenceDictionary());

    final PCollectionView<SAMFileHeader> headerSingleton =
        ReadsDataflowSource.getHeaderView(pipeline, readsHeader);
    final PCollection<GATKRead> initialReads = readsDataflowSource.getReadPCollection(intervals);

    // Apply MarkDuplicates to produce updated GATKReads.
    final PCollection<GATKRead> markedReads =
        initialReads.apply(new MarkDuplicates(headerSingleton));

    // Load the Variants and the Reference and join them to reads.
    final VariantsDataflowSource variantsDataflowSource =
        new VariantsDataflowSource(baseRecalibrationKnownVariants, pipeline);

    Map<String, String> referenceNameToIdTable =
        RefAPISource.buildReferenceNameToIdTable(pipeline.getOptions(), referenceName);
    RefAPIMetadata refAPIMetadata = new RefAPIMetadata(referenceName, referenceNameToIdTable);

    final PCollection<KV<GATKRead, ReadContextData>> readsWithContext =
        AddContextDataToRead.add(markedReads, refAPIMetadata, variantsDataflowSource);

    // Apply BQSR.
    final PCollection<RecalibrationTables> recalibrationReports =
        readsWithContext.apply(new BaseRecalibratorStub(headerSingleton));
    final PCollectionView<RecalibrationTables> mergedRecalibrationReport =
        recalibrationReports.apply(View.<RecalibrationTables>asSingleton());

    final PCollection<GATKRead> finalReads =
        markedReads.apply(new ApplyBQSRStub(headerSingleton, mergedRecalibrationReport));
    SmallBamWriter.writeToFile(pipeline, finalReads, readsHeader, output);
  }