private Collection<Throwable> execute_bam(String source) throws IOException { SamReader in = null; SAMRecordIterator iter = null; try { SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT); if (source == null) { in = srf.open(SamInputResource.of(stdin())); } else { in = srf.open(SamInputResource.of(source)); } iter = in.iterator(); bindings.put("header", in.getFileHeader()); bindings.put("iter", iter); bindings.put("format", "sam"); this.script.eval(bindings); return RETURN_OK; } catch (Exception e) { LOG.error(e); return wrapException(e); } finally { CloserUtil.close(in); CloserUtil.close(iter); bindings.remove("header"); bindings.remove("iter"); bindings.remove("format"); } }
@Exec public void exec() throws IOException, CommandArgumentException { if (filename == null) { throw new CommandArgumentException("You must specify an input BAM filename!"); } SamReaderFactory readerFactory = SamReaderFactory.makeDefault(); if (lenient) { readerFactory.validationStringency(ValidationStringency.LENIENT); } else if (silent) { readerFactory.validationStringency(ValidationStringency.SILENT); } SamReader reader = null; String name; FileChannel channel = null; if (filename.equals("-")) { reader = readerFactory.open(SamInputResource.of(System.in)); name = "<stdin>"; } else { File f = new File(filename); FileInputStream fis = new FileInputStream(f); channel = fis.getChannel(); reader = readerFactory.open(SamInputResource.of(fis)); name = f.getName(); } Iterator<SAMRecord> it = ProgressUtils.getIterator( name, reader.iterator(), (channel == null) ? null : new FileChannelStats(channel), new ProgressMessage<SAMRecord>() { long i = 0; @Override public String msg(SAMRecord current) { i++; return i + " " + current.getReadName(); } }, new CloseableFinalizer<SAMRecord>()); long i = 0; while (it.hasNext()) { i++; it.next(); } reader.close(); System.err.println("Successfully read: " + i + " records."); }
public CloseableIterator<PicardAlignment> iterator() { try { if (reader == null) { InputStream is = HttpUtils.getInstance().openConnectionStream(url); reader = new SAMFileReader(new BufferedInputStream(is)); } return new WrappedIterator(reader.iterator()); } catch (IOException e) { log.error("Error creating iterator", e); throw new RuntimeException(e); } }
public CloseableIterator<PicardAlignment> query( String sequence, int start, int end, boolean contained) { try { if (reader == null) { reader = getSamReader(locator, true); } CloseableIterator<SAMRecord> iter = reader.query(sequence, start + 1, end, contained); return new WrappedIterator(iter); } catch (IOException e) { log.error("Error opening SAM reader", e); throw new RuntimeException("Error opening SAM reader", e); } }
@Override public int doWork(String[] args) { boolean compressed = false; int maxRecordsInRAM = 100000; long count = -1L; File fileout = null; com.github.lindenb.jvarkit.util.cli.GetOpt opt = new com.github.lindenb.jvarkit.util.cli.GetOpt(); int c; while ((c = opt.getopt(args, getGetOptDefault() + "o:n:N:T:b")) != -1) { switch (c) { case 'b': compressed = true; break; case 'N': maxRecordsInRAM = Integer.parseInt(opt.getOptArg()); break; case 'n': count = Long.parseLong(opt.getOptArg()); break; case 'o': fileout = new File(opt.getOptArg()); break; case 'T': this.addTmpDirectory(new File(opt.getOptArg())); break; default: { switch (handleOtherOptions(c, opt, null)) { case EXIT_FAILURE: return -1; case EXIT_SUCCESS: return 0; default: break; } } } } if (count < -1L) // -1 == infinite { error("Bad count:" + count); return -1; } SamReader samReader = null; SAMRecordIterator iter = null; SAMFileWriter samWriter = null; Random random = new Random(); CloseableIterator<RandSamRecord> iter2 = null; try { SamFileReaderFactory.setDefaultValidationStringency(ValidationStringency.SILENT); if (opt.getOptInd() == args.length) { info("Reading from stdin"); samReader = SamFileReaderFactory.mewInstance().openStdin(); } else if (opt.getOptInd() + 1 == args.length) { File filename = new File(args[opt.getOptInd()]); info("Reading from " + filename); samReader = SamFileReaderFactory.mewInstance().open(filename); } else { error("Illegal number of arguments."); return -1; } SAMFileHeader header = samReader.getFileHeader(); header = header.clone(); header.setSortOrder(SortOrder.unsorted); header.addComment("Processed with " + getProgramName() + " : " + getProgramCommandLine()); SAMFileWriterFactory sfw = new SAMFileWriterFactory(); sfw.setCreateIndex(false); sfw.setCreateMd5File(false); if (fileout == null) { if (compressed) { samWriter = sfw.makeBAMWriter(header, true, System.out); } else { samWriter = sfw.makeSAMWriter(header, true, System.out); } } else { samWriter = sfw.makeSAMOrBAMWriter(header, true, fileout); this.addTmpDirectory(fileout); } iter = samReader.iterator(); SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(samReader.getFileHeader().getSequenceDictionary()); SortingCollection<RandSamRecord> sorter = SortingCollection.newInstance( RandSamRecord.class, new RandSamRecordCodec(header), new RandSamRecordComparator(), maxRecordsInRAM, getTmpDirectories()); sorter.setDestructiveIteration(true); while (iter.hasNext()) { RandSamRecord r = new RandSamRecord(); r.rand_index = random.nextInt(); r.samRecord = progress.watch(iter.next()); sorter.add(r); } iter.close(); iter = null; sorter.doneAdding(); iter2 = sorter.iterator(); if (count == -1) { while (iter2.hasNext()) { samWriter.addAlignment(iter2.next().samRecord); } } else { while (iter2.hasNext() && count > 0) { samWriter.addAlignment(iter2.next().samRecord); count--; } } iter2.close(); iter2 = null; sorter.cleanup(); progress.finish(); } catch (Exception e) { error(e); return -1; } finally { CloserUtil.close(iter); CloserUtil.close(iter2); CloserUtil.close(samReader); CloserUtil.close(samWriter); } return 0; }
@Override public int doWork(String[] args) { String region = null; com.github.lindenb.jvarkit.util.cli.GetOpt opt = new com.github.lindenb.jvarkit.util.cli.GetOpt(); int c; while ((c = opt.getopt(args, getGetOptDefault() + "r:R:")) != -1) { switch (c) { case 'r': region = opt.getOptArg(); break; case 'R': this.referenceFile = new File(opt.getOptArg()); break; default: { switch (handleOtherOptions(c, opt, args)) { case EXIT_FAILURE: return -1; case EXIT_SUCCESS: return 0; default: break; } } } } File faidx = null; File bamFile = null; if (opt.getOptInd() + 1 == args.length) { bamFile = new File(args[opt.getOptInd()]); } else if (opt.getOptInd() + 2 == args.length) { bamFile = new File(args[opt.getOptInd()]); faidx = new File(args[opt.getOptInd() + 1]); } else { System.err.println("Illegal Number of arguments."); return -1; } IndexedFastaSequenceFile ref = null; PrintWriter out = new PrintWriter(System.out); SamReader samReader = null; try { info("opening " + bamFile); SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT); samReader = srf.open(bamFile); Interval interval = IntervalUtils.parseOne(samReader.getFileHeader().getSequenceDictionary(), region); if (interval == null) { error("Bad interval " + interval); return -1; } if (faidx != null) { ref = new IndexedFastaSequenceFile(faidx); } TViewHandler handler = new AsciiHandler(); new TView().execute(samReader, ref, interval, handler); } catch (Exception e) { e.printStackTrace(); return -1; } finally { out.flush(); CloserUtil.close(samReader); CloserUtil.close(ref); } return 0; }
public SAMFileHeader getFileHeader() { if (header == null) { header = reader.getFileHeader(); } return header; }
public void close() throws IOException { if (reader != null) { reader.close(); } }
/** Combines multiple SAM/BAM files into one. */ @Override protected int doWork() { boolean matchedSortOrders = true; // read interval list if it is defined final List<Interval> intervalList = (INTERVALS == null ? null : IntervalList.fromFile(INTERVALS).uniqued().getIntervals()); // map reader->iterator used if INTERVALS is defined final Map<SamReader, CloseableIterator<SAMRecord>> samReaderToIterator = new HashMap<SamReader, CloseableIterator<SAMRecord>>(INPUT.size()); // Open the files for reading and writing final List<SamReader> readers = new ArrayList<SamReader>(); final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(); { SAMSequenceDictionary dict = null; // Used to try and reduce redundant SDs in memory for (final File inFile : INPUT) { IOUtil.assertFileIsReadable(inFile); final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile); if (INTERVALS != null) { if (!in.hasIndex()) throw new PicardException( "Merging with interval but Bam file is not indexed " + inFile); final CloseableIterator<SAMRecord> samIterator = new SamRecordIntervalIteratorFactory() .makeSamRecordIntervalIterator(in, intervalList, true); samReaderToIterator.put(in, samIterator); } readers.add(in); headers.add(in.getFileHeader()); // A slightly hackish attempt to keep memory consumption down when merging multiple files // with // large sequence dictionaries (10,000s of sequences). If the dictionaries are identical, // then // replace the duplicate copies with a single dictionary to reduce the memory footprint. if (dict == null) { dict = in.getFileHeader().getSequenceDictionary(); } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) { in.getFileHeader().setSequenceDictionary(dict); } matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER; } } // If all the input sort orders match the output sort order then just merge them and // write on the fly, otherwise setup to merge and sort before writing out the final file IOUtil.assertFileIsWritable(OUTPUT); final boolean presorted; final SAMFileHeader.SortOrder headerMergerSortOrder; final boolean mergingSamRecordIteratorAssumeSorted; if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED || INTERVALS != null) { log.info( "Input files are in same order as output so sorting to temp directory is not needed."); headerMergerSortOrder = SORT_ORDER; mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED; presorted = true; } else { log.info("Sorting input files using temp directory " + TMP_DIR); headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted; mergingSamRecordIteratorAssumeSorted = false; presorted = false; } final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES); final MergingSamRecordIterator iterator; // no interval defined, get an iterator for the whole bam if (intervalList == null) { iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted); } else { // show warning related to https://github.com/broadinstitute/picard/pull/314/files log.info( "Warning: merged bams from different interval lists may contain the same read in both files"); iterator = new MergingSamRecordIterator(headerMerger, samReaderToIterator, true); } final SAMFileHeader header = headerMerger.getMergedHeader(); for (final String comment : COMMENT) { header.addComment(comment); } header.setSortOrder(SORT_ORDER); final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory(); if (USE_THREADING) { samFileWriterFactory.setUseAsyncIo(true); } final SAMFileWriter out = samFileWriterFactory.makeSAMOrBAMWriter(header, presorted, OUTPUT); // Lastly loop through and write out the records final ProgressLogger progress = new ProgressLogger(log, PROGRESS_INTERVAL); while (iterator.hasNext()) { final SAMRecord record = iterator.next(); out.addAlignment(record); progress.record(record); } log.info("Finished reading inputs."); for (final CloseableIterator<SAMRecord> iter : samReaderToIterator.values()) CloserUtil.close(iter); CloserUtil.close(readers); out.close(); return 0; }
/** * Test that PG header records are created & chained appropriately (or not created), and that the * PG record chains are as expected. MarkDuplicates is used both to merge and to mark dupes in * this case. * * @param suppressPg If true, do not create PG header record. * @param expectedPnVnByReadName For each read, info about the expect chain of PG records. */ @Test(dataProvider = "pgRecordChainingTest") public void pgRecordChainingTest( final boolean suppressPg, final Map<String, List<ExpectedPnAndVn>> expectedPnVnByReadName) { final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp"); outputDir.deleteOnExit(); try { // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header // record creation according to suppressPg. final MarkDuplicates markDuplicates = new MarkDuplicates(); final ArrayList<String> args = new ArrayList<String>(); for (int i = 1; i <= 3; ++i) { args.add("INPUT=" + new File(TEST_DATA_DIR, "merge" + i + ".sam").getAbsolutePath()); } final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam"); args.add("OUTPUT=" + outputSam.getAbsolutePath()); args.add( "METRICS_FILE=" + new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics").getAbsolutePath()); if (suppressPg) args.add("PROGRAM_RECORD_ID=null"); // I generally prefer to call doWork rather than invoking the argument parser, but it is // necessary // in this case to initialize the command line. // Note that for the unit test, version won't come through because it is obtained through jar // manifest, and unit test doesn't run code from a jar. Assert.assertEquals(markDuplicates.instanceMain(args.toArray(new String[args.size()])), 0); // Read the MarkDuplicates output file, and get the PG ID for each read. In this particular // test, // the PG ID should be the same for both ends of a pair. final SamReader reader = SamReaderFactory.makeDefault().open(outputSam); final Map<String, String> pgIdForReadName = new HashMap<String, String>(); for (final SAMRecord rec : reader) { final String existingPgId = pgIdForReadName.get(rec.getReadName()); final String thisPgId = rec.getStringAttribute(SAMTag.PG.name()); if (existingPgId != null) { Assert.assertEquals(thisPgId, existingPgId); } else { pgIdForReadName.put(rec.getReadName(), thisPgId); } } final SAMFileHeader header = reader.getFileHeader(); CloserUtil.close(reader); // Confirm that for each read name, the chain of PG records contains exactly the number that // is expected, // and that values in the PG chain are as expected. for (final Map.Entry<String, List<ExpectedPnAndVn>> entry : expectedPnVnByReadName.entrySet()) { final String readName = entry.getKey(); final List<ExpectedPnAndVn> expectedList = entry.getValue(); String pgId = pgIdForReadName.get(readName); for (final ExpectedPnAndVn expected : expectedList) { final SAMProgramRecord programRecord = header.getProgramRecord(pgId); if (expected.expectedPn != null) Assert.assertEquals(programRecord.getProgramName(), expected.expectedPn); if (expected.expectedVn != null) Assert.assertEquals(programRecord.getProgramVersion(), expected.expectedVn); pgId = programRecord.getPreviousProgramGroupId(); } Assert.assertNull(pgId); } } finally { TestUtil.recursiveDelete(outputDir); } }
private BAMProcessorImplDict(File in) { reader = SamReaderFactory.makeDefault().open(in); iterator = reader.iterator(); queue = new LinkedList<SAMRecord>(); }