Ejemplo n.º 1
0
 @Override
 public void processElement(ProcessContext c) throws Exception {
   String dest = c.element();
   logger.info("Saving to " + dest);
   Iterable<GATKRead> reads = c.sideInput(iterableView);
   OutputStream outputStream = BucketUtils.createFile(dest, c.getPipelineOptions());
   try (SAMFileWriter writer =
       new SAMFileWriterFactory().makeBAMWriter(header, false, outputStream)) {
     for (GATKRead r : reads) {
       final SAMRecord sr = r.convertToSAMRecord(header);
       writer.addAlignment(sr);
     }
   }
 }
Ejemplo n.º 2
0
  /**
   * Takes a few Reads and will write them to a BAM file. The Reads don't have to be sorted
   * initially, the BAM file will be. All the reads must fit into a single worker's memory, so this
   * won't go well if you have too many.
   *
   * @param pipeline the pipeline to add this operation to.
   * @param reads the reads to write (they don't need to be sorted).
   * @param header the header that corresponds to the reads.
   * @param destPath the GCS or local path to write to (must start with "gs://" if writing to GCS).
   * @param parquet whether to write out BAM or Parquet data (BDG AlignmentRecords); only applies
   *     when writing to Hadoop
   */
  public static void writeToFile(
      Pipeline pipeline,
      PCollection<GATKRead> reads,
      final SAMFileHeader header,
      final String destPath,
      final boolean parquet) {
    if (BucketUtils.isHadoopUrl(destPath)
        || pipeline.getRunner().getClass().equals(SparkPipelineRunner.class)) {
      writeToHadoop(pipeline, reads, header, destPath, parquet);
    } else {
      PCollectionView<Iterable<GATKRead>> iterableView = reads.apply(View.<GATKRead>asIterable());

      PCollection<String> dummy = pipeline.apply("output file name", Create.<String>of(destPath));

      dummy.apply(
          ParDo.named("save to BAM file")
              .withSideInputs(iterableView)
              .of(new SaveToBAMFile(header, iterableView)));
    }
  }