/**
   * Construct an artificial SAM file reader with the given SAM file header
   *
   * @param customHeader Header that should be returned by calls to getFileHeader() on this reader
   * @param reads Reads to use as backing data source.
   */
  public ArtificialSAMFileReader(SAMFileHeader customHeader, SAMRecord... reads) {
    super(createEmptyInputStream(), true);

    this.customHeader = customHeader;
    this.genomeLocParser = new GenomeLocParser(customHeader.getSequenceDictionary());
    this.reads = Arrays.asList(reads);
  }
Exemplo n.º 2
0
 public List<String> getSequenceNames() {
   if (sequenceNames == null) {
     SAMFileHeader header = getFileHeader();
     if (header == null) {
       return null;
     }
     sequenceNames = new ArrayList();
     List<SAMSequenceRecord> records = header.getSequenceDictionary().getSequences();
     if (records.size() > 0) {
       for (SAMSequenceRecord rec : header.getSequenceDictionary().getSequences()) {
         String chr = rec.getSequenceName();
         sequenceNames.add(chr);
       }
     }
   }
   return sequenceNames;
 }
  @Override
  protected void setup(final SAMFileHeader header, final File samFile) {
    final String outext =
        (null != FILE_EXTENSION) ? FILE_EXTENSION : ""; // Add a file extension if desired
    preAdapterSummaryOut =
        new File(OUTPUT + SequencingArtifactMetrics.PRE_ADAPTER_SUMMARY_EXT + outext);
    preAdapterDetailsOut =
        new File(OUTPUT + SequencingArtifactMetrics.PRE_ADAPTER_DETAILS_EXT + outext);
    baitBiasSummaryOut =
        new File(OUTPUT + SequencingArtifactMetrics.BAIT_BIAS_SUMMARY_EXT + outext);
    baitBiasDetailsOut =
        new File(OUTPUT + SequencingArtifactMetrics.BAIT_BIAS_DETAILS_EXT + outext);

    IOUtil.assertFileIsWritable(preAdapterSummaryOut);
    IOUtil.assertFileIsWritable(preAdapterDetailsOut);
    IOUtil.assertFileIsWritable(baitBiasSummaryOut);
    IOUtil.assertFileIsWritable(baitBiasDetailsOut);

    for (final SAMReadGroupRecord rec : header.getReadGroups()) {
      samples.add(getOrElse(rec.getSample(), UNKNOWN_SAMPLE));
      libraries.add(getOrElse(rec.getLibrary(), UNKNOWN_LIBRARY));
    }

    if (INTERVALS != null) {
      IOUtil.assertFileIsReadable(INTERVALS);
      intervalMask =
          new IntervalListReferenceSequenceMask(IntervalList.fromFile(INTERVALS).uniqued());
    }

    if (DB_SNP != null) {
      IOUtil.assertFileIsReadable(DB_SNP);
      dbSnpMask = new DbSnpBitSetUtil(DB_SNP, header.getSequenceDictionary());
    }

    // set record-level filters
    final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>();
    filters.add(new FailsVendorReadQualityFilter());
    filters.add(new NotPrimaryAlignmentFilter());
    filters.add(new DuplicateReadFilter());
    filters.add(new AlignedFilter(true)); // discard unmapped reads
    filters.add(new MappingQualityFilter(MINIMUM_MAPPING_QUALITY));
    if (!INCLUDE_UNPAIRED) {
      final int effectiveMaxInsertSize =
          (MAXIMUM_INSERT_SIZE == 0) ? Integer.MAX_VALUE : MAXIMUM_INSERT_SIZE;
      filters.add(new InsertSizeFilter(MINIMUM_INSERT_SIZE, effectiveMaxInsertSize));
    }
    recordFilter = new AggregateFilter(filters);

    // set up the artifact counters
    final String sampleAlias = StringUtil.join(",", new ArrayList<String>(samples));
    for (final String library : libraries) {
      artifactCounters.put(
          library, new ArtifactCounter(sampleAlias, library, CONTEXT_SIZE, TANDEM_READS));
    }
  }
  @Override
  protected void setupPipeline(Pipeline pipeline) {
    // Load the reads.
    final ReadsDataflowSource readsDataflowSource = new ReadsDataflowSource(bam, pipeline);
    final SAMFileHeader readsHeader = readsDataflowSource.getHeader();
    final List<SimpleInterval> intervals =
        intervalArgumentCollection.intervalsSpecified()
            ? intervalArgumentCollection.getIntervals(readsHeader.getSequenceDictionary())
            : IntervalUtils.getAllIntervalsForReference(readsHeader.getSequenceDictionary());

    final PCollectionView<SAMFileHeader> headerSingleton =
        ReadsDataflowSource.getHeaderView(pipeline, readsHeader);
    final PCollection<GATKRead> initialReads = readsDataflowSource.getReadPCollection(intervals);

    // Apply MarkDuplicates to produce updated GATKReads.
    final PCollection<GATKRead> markedReads =
        initialReads.apply(new MarkDuplicates(headerSingleton));

    // Load the Variants and the Reference and join them to reads.
    final VariantsDataflowSource variantsDataflowSource =
        new VariantsDataflowSource(baseRecalibrationKnownVariants, pipeline);

    Map<String, String> referenceNameToIdTable =
        RefAPISource.buildReferenceNameToIdTable(pipeline.getOptions(), referenceName);
    RefAPIMetadata refAPIMetadata = new RefAPIMetadata(referenceName, referenceNameToIdTable);

    final PCollection<KV<GATKRead, ReadContextData>> readsWithContext =
        AddContextDataToRead.add(markedReads, refAPIMetadata, variantsDataflowSource);

    // Apply BQSR.
    final PCollection<RecalibrationTables> recalibrationReports =
        readsWithContext.apply(new BaseRecalibratorStub(headerSingleton));
    final PCollectionView<RecalibrationTables> mergedRecalibrationReport =
        recalibrationReports.apply(View.<RecalibrationTables>asSingleton());

    final PCollection<GATKRead> finalReads =
        markedReads.apply(new ApplyBQSRStub(headerSingleton, mergedRecalibrationReport));
    SmallBamWriter.writeToFile(pipeline, finalReads, readsHeader, output);
  }
 private SAMFileHeader makeHeader() {
   final SAMFileHeader header = new SAMFileHeader();
   final SAMSequenceDictionary dict = header.getSequenceDictionary();
   dict.addSequence(new SAMSequenceRecord("chr20", 62435964));
   return header;
 }
Exemplo n.º 6
0
 @BeforeClass
 public void init() {
   header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000000);
   genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
 }
 @BeforeClass
 public void beforeClass() {
   header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
   genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
   loc = genomeLocParser.createGenomeLoc("chr1", 1);
 }