/** * Construct an artificial SAM file reader with the given SAM file header * * @param customHeader Header that should be returned by calls to getFileHeader() on this reader * @param reads Reads to use as backing data source. */ public ArtificialSAMFileReader(SAMFileHeader customHeader, SAMRecord... reads) { super(createEmptyInputStream(), true); this.customHeader = customHeader; this.genomeLocParser = new GenomeLocParser(customHeader.getSequenceDictionary()); this.reads = Arrays.asList(reads); }
public List<String> getSequenceNames() { if (sequenceNames == null) { SAMFileHeader header = getFileHeader(); if (header == null) { return null; } sequenceNames = new ArrayList(); List<SAMSequenceRecord> records = header.getSequenceDictionary().getSequences(); if (records.size() > 0) { for (SAMSequenceRecord rec : header.getSequenceDictionary().getSequences()) { String chr = rec.getSequenceName(); sequenceNames.add(chr); } } } return sequenceNames; }
@Override protected void setup(final SAMFileHeader header, final File samFile) { final String outext = (null != FILE_EXTENSION) ? FILE_EXTENSION : ""; // Add a file extension if desired preAdapterSummaryOut = new File(OUTPUT + SequencingArtifactMetrics.PRE_ADAPTER_SUMMARY_EXT + outext); preAdapterDetailsOut = new File(OUTPUT + SequencingArtifactMetrics.PRE_ADAPTER_DETAILS_EXT + outext); baitBiasSummaryOut = new File(OUTPUT + SequencingArtifactMetrics.BAIT_BIAS_SUMMARY_EXT + outext); baitBiasDetailsOut = new File(OUTPUT + SequencingArtifactMetrics.BAIT_BIAS_DETAILS_EXT + outext); IOUtil.assertFileIsWritable(preAdapterSummaryOut); IOUtil.assertFileIsWritable(preAdapterDetailsOut); IOUtil.assertFileIsWritable(baitBiasSummaryOut); IOUtil.assertFileIsWritable(baitBiasDetailsOut); for (final SAMReadGroupRecord rec : header.getReadGroups()) { samples.add(getOrElse(rec.getSample(), UNKNOWN_SAMPLE)); libraries.add(getOrElse(rec.getLibrary(), UNKNOWN_LIBRARY)); } if (INTERVALS != null) { IOUtil.assertFileIsReadable(INTERVALS); intervalMask = new IntervalListReferenceSequenceMask(IntervalList.fromFile(INTERVALS).uniqued()); } if (DB_SNP != null) { IOUtil.assertFileIsReadable(DB_SNP); dbSnpMask = new DbSnpBitSetUtil(DB_SNP, header.getSequenceDictionary()); } // set record-level filters final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>(); filters.add(new FailsVendorReadQualityFilter()); filters.add(new NotPrimaryAlignmentFilter()); filters.add(new DuplicateReadFilter()); filters.add(new AlignedFilter(true)); // discard unmapped reads filters.add(new MappingQualityFilter(MINIMUM_MAPPING_QUALITY)); if (!INCLUDE_UNPAIRED) { final int effectiveMaxInsertSize = (MAXIMUM_INSERT_SIZE == 0) ? Integer.MAX_VALUE : MAXIMUM_INSERT_SIZE; filters.add(new InsertSizeFilter(MINIMUM_INSERT_SIZE, effectiveMaxInsertSize)); } recordFilter = new AggregateFilter(filters); // set up the artifact counters final String sampleAlias = StringUtil.join(",", new ArrayList<String>(samples)); for (final String library : libraries) { artifactCounters.put( library, new ArtifactCounter(sampleAlias, library, CONTEXT_SIZE, TANDEM_READS)); } }
@Override protected void setupPipeline(Pipeline pipeline) { // Load the reads. final ReadsDataflowSource readsDataflowSource = new ReadsDataflowSource(bam, pipeline); final SAMFileHeader readsHeader = readsDataflowSource.getHeader(); final List<SimpleInterval> intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(readsHeader.getSequenceDictionary()) : IntervalUtils.getAllIntervalsForReference(readsHeader.getSequenceDictionary()); final PCollectionView<SAMFileHeader> headerSingleton = ReadsDataflowSource.getHeaderView(pipeline, readsHeader); final PCollection<GATKRead> initialReads = readsDataflowSource.getReadPCollection(intervals); // Apply MarkDuplicates to produce updated GATKReads. final PCollection<GATKRead> markedReads = initialReads.apply(new MarkDuplicates(headerSingleton)); // Load the Variants and the Reference and join them to reads. final VariantsDataflowSource variantsDataflowSource = new VariantsDataflowSource(baseRecalibrationKnownVariants, pipeline); Map<String, String> referenceNameToIdTable = RefAPISource.buildReferenceNameToIdTable(pipeline.getOptions(), referenceName); RefAPIMetadata refAPIMetadata = new RefAPIMetadata(referenceName, referenceNameToIdTable); final PCollection<KV<GATKRead, ReadContextData>> readsWithContext = AddContextDataToRead.add(markedReads, refAPIMetadata, variantsDataflowSource); // Apply BQSR. final PCollection<RecalibrationTables> recalibrationReports = readsWithContext.apply(new BaseRecalibratorStub(headerSingleton)); final PCollectionView<RecalibrationTables> mergedRecalibrationReport = recalibrationReports.apply(View.<RecalibrationTables>asSingleton()); final PCollection<GATKRead> finalReads = markedReads.apply(new ApplyBQSRStub(headerSingleton, mergedRecalibrationReport)); SmallBamWriter.writeToFile(pipeline, finalReads, readsHeader, output); }
private SAMFileHeader makeHeader() { final SAMFileHeader header = new SAMFileHeader(); final SAMSequenceDictionary dict = header.getSequenceDictionary(); dict.addSequence(new SAMSequenceRecord("chr20", 62435964)); return header; }
@BeforeClass public void init() { header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000000); genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); }
@BeforeClass public void beforeClass() { header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); loc = genomeLocParser.createGenomeLoc("chr1", 1); }