Beispiel #1
0
 private Collection<Throwable> execute_bam(String source) throws IOException {
   SamReader in = null;
   SAMRecordIterator iter = null;
   try {
     SamReaderFactory srf =
         SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
     if (source == null) {
       in = srf.open(SamInputResource.of(stdin()));
     } else {
       in = srf.open(SamInputResource.of(source));
     }
     iter = in.iterator();
     bindings.put("header", in.getFileHeader());
     bindings.put("iter", iter);
     bindings.put("format", "sam");
     this.script.eval(bindings);
     return RETURN_OK;
   } catch (Exception e) {
     LOG.error(e);
     return wrapException(e);
   } finally {
     CloserUtil.close(in);
     CloserUtil.close(iter);
     bindings.remove("header");
     bindings.remove("iter");
     bindings.remove("format");
   }
 }
Beispiel #2
0
  @Exec
  public void exec() throws IOException, CommandArgumentException {
    if (filename == null) {
      throw new CommandArgumentException("You must specify an input BAM filename!");
    }
    SamReaderFactory readerFactory = SamReaderFactory.makeDefault();
    if (lenient) {
      readerFactory.validationStringency(ValidationStringency.LENIENT);
    } else if (silent) {
      readerFactory.validationStringency(ValidationStringency.SILENT);
    }

    SamReader reader = null;
    String name;
    FileChannel channel = null;
    if (filename.equals("-")) {
      reader = readerFactory.open(SamInputResource.of(System.in));
      name = "<stdin>";
    } else {
      File f = new File(filename);
      FileInputStream fis = new FileInputStream(f);
      channel = fis.getChannel();
      reader = readerFactory.open(SamInputResource.of(fis));
      name = f.getName();
    }

    Iterator<SAMRecord> it =
        ProgressUtils.getIterator(
            name,
            reader.iterator(),
            (channel == null) ? null : new FileChannelStats(channel),
            new ProgressMessage<SAMRecord>() {
              long i = 0;

              @Override
              public String msg(SAMRecord current) {
                i++;
                return i + " " + current.getReadName();
              }
            },
            new CloseableFinalizer<SAMRecord>());
    long i = 0;
    while (it.hasNext()) {
      i++;
      it.next();
    }
    reader.close();
    System.err.println("Successfully read: " + i + " records.");
  }
Beispiel #3
0
 public CloseableIterator<PicardAlignment> iterator() {
   try {
     if (reader == null) {
       InputStream is = HttpUtils.getInstance().openConnectionStream(url);
       reader = new SAMFileReader(new BufferedInputStream(is));
     }
     return new WrappedIterator(reader.iterator());
   } catch (IOException e) {
     log.error("Error creating iterator", e);
     throw new RuntimeException(e);
   }
 }
Beispiel #4
0
 public CloseableIterator<PicardAlignment> query(
     String sequence, int start, int end, boolean contained) {
   try {
     if (reader == null) {
       reader = getSamReader(locator, true);
     }
     CloseableIterator<SAMRecord> iter = reader.query(sequence, start + 1, end, contained);
     return new WrappedIterator(iter);
   } catch (IOException e) {
     log.error("Error opening SAM reader", e);
     throw new RuntimeException("Error opening SAM reader", e);
   }
 }
Beispiel #5
0
  @Override
  public int doWork(String[] args) {
    boolean compressed = false;
    int maxRecordsInRAM = 100000;
    long count = -1L;
    File fileout = null;
    com.github.lindenb.jvarkit.util.cli.GetOpt opt =
        new com.github.lindenb.jvarkit.util.cli.GetOpt();
    int c;
    while ((c = opt.getopt(args, getGetOptDefault() + "o:n:N:T:b")) != -1) {
      switch (c) {
        case 'b':
          compressed = true;
          break;
        case 'N':
          maxRecordsInRAM = Integer.parseInt(opt.getOptArg());
          break;
        case 'n':
          count = Long.parseLong(opt.getOptArg());
          break;
        case 'o':
          fileout = new File(opt.getOptArg());
          break;
        case 'T':
          this.addTmpDirectory(new File(opt.getOptArg()));
          break;
        default:
          {
            switch (handleOtherOptions(c, opt, null)) {
              case EXIT_FAILURE:
                return -1;
              case EXIT_SUCCESS:
                return 0;
              default:
                break;
            }
          }
      }
    }
    if (count < -1L) // -1 == infinite
    {
      error("Bad count:" + count);
      return -1;
    }
    SamReader samReader = null;
    SAMRecordIterator iter = null;
    SAMFileWriter samWriter = null;
    Random random = new Random();
    CloseableIterator<RandSamRecord> iter2 = null;
    try {
      SamFileReaderFactory.setDefaultValidationStringency(ValidationStringency.SILENT);
      if (opt.getOptInd() == args.length) {
        info("Reading from stdin");
        samReader = SamFileReaderFactory.mewInstance().openStdin();
      } else if (opt.getOptInd() + 1 == args.length) {
        File filename = new File(args[opt.getOptInd()]);
        info("Reading from " + filename);
        samReader = SamFileReaderFactory.mewInstance().open(filename);
      } else {
        error("Illegal number of arguments.");
        return -1;
      }
      SAMFileHeader header = samReader.getFileHeader();

      header = header.clone();
      header.setSortOrder(SortOrder.unsorted);
      header.addComment("Processed with " + getProgramName() + " : " + getProgramCommandLine());
      SAMFileWriterFactory sfw = new SAMFileWriterFactory();
      sfw.setCreateIndex(false);
      sfw.setCreateMd5File(false);
      if (fileout == null) {
        if (compressed) {
          samWriter = sfw.makeBAMWriter(header, true, System.out);
        } else {
          samWriter = sfw.makeSAMWriter(header, true, System.out);
        }
      } else {
        samWriter = sfw.makeSAMOrBAMWriter(header, true, fileout);
        this.addTmpDirectory(fileout);
      }
      iter = samReader.iterator();
      SAMSequenceDictionaryProgress progress =
          new SAMSequenceDictionaryProgress(samReader.getFileHeader().getSequenceDictionary());

      SortingCollection<RandSamRecord> sorter =
          SortingCollection.newInstance(
              RandSamRecord.class,
              new RandSamRecordCodec(header),
              new RandSamRecordComparator(),
              maxRecordsInRAM,
              getTmpDirectories());
      sorter.setDestructiveIteration(true);
      while (iter.hasNext()) {
        RandSamRecord r = new RandSamRecord();
        r.rand_index = random.nextInt();
        r.samRecord = progress.watch(iter.next());

        sorter.add(r);
      }
      iter.close();
      iter = null;

      sorter.doneAdding();
      iter2 = sorter.iterator();
      if (count == -1) {
        while (iter2.hasNext()) {
          samWriter.addAlignment(iter2.next().samRecord);
        }
      } else {
        while (iter2.hasNext() && count > 0) {
          samWriter.addAlignment(iter2.next().samRecord);
          count--;
        }
      }
      iter2.close();
      iter2 = null;
      sorter.cleanup();
      progress.finish();
    } catch (Exception e) {
      error(e);
      return -1;
    } finally {
      CloserUtil.close(iter);
      CloserUtil.close(iter2);
      CloserUtil.close(samReader);
      CloserUtil.close(samWriter);
    }
    return 0;
  }
Beispiel #6
0
  @Override
  public int doWork(String[] args) {
    String region = null;
    com.github.lindenb.jvarkit.util.cli.GetOpt opt =
        new com.github.lindenb.jvarkit.util.cli.GetOpt();
    int c;
    while ((c = opt.getopt(args, getGetOptDefault() + "r:R:")) != -1) {
      switch (c) {
        case 'r':
          region = opt.getOptArg();
          break;
        case 'R':
          this.referenceFile = new File(opt.getOptArg());
          break;
        default:
          {
            switch (handleOtherOptions(c, opt, args)) {
              case EXIT_FAILURE:
                return -1;
              case EXIT_SUCCESS:
                return 0;
              default:
                break;
            }
          }
      }
    }

    File faidx = null;
    File bamFile = null;

    if (opt.getOptInd() + 1 == args.length) {
      bamFile = new File(args[opt.getOptInd()]);
    } else if (opt.getOptInd() + 2 == args.length) {
      bamFile = new File(args[opt.getOptInd()]);
      faidx = new File(args[opt.getOptInd() + 1]);
    } else {
      System.err.println("Illegal Number of arguments.");
      return -1;
    }

    IndexedFastaSequenceFile ref = null;
    PrintWriter out = new PrintWriter(System.out);
    SamReader samReader = null;
    try {
      info("opening " + bamFile);
      SamReaderFactory srf =
          SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
      samReader = srf.open(bamFile);

      Interval interval =
          IntervalUtils.parseOne(samReader.getFileHeader().getSequenceDictionary(), region);
      if (interval == null) {
        error("Bad interval " + interval);
        return -1;
      }
      if (faidx != null) {
        ref = new IndexedFastaSequenceFile(faidx);
      }

      TViewHandler handler = new AsciiHandler();
      new TView().execute(samReader, ref, interval, handler);
    } catch (Exception e) {
      e.printStackTrace();
      return -1;
    } finally {
      out.flush();
      CloserUtil.close(samReader);
      CloserUtil.close(ref);
    }
    return 0;
  }
Beispiel #7
0
 public SAMFileHeader getFileHeader() {
   if (header == null) {
     header = reader.getFileHeader();
   }
   return header;
 }
Beispiel #8
0
 public void close() throws IOException {
   if (reader != null) {
     reader.close();
   }
 }
Beispiel #9
0
  /** Combines multiple SAM/BAM files into one. */
  @Override
  protected int doWork() {
    boolean matchedSortOrders = true;

    // read interval list if it is defined
    final List<Interval> intervalList =
        (INTERVALS == null ? null : IntervalList.fromFile(INTERVALS).uniqued().getIntervals());
    // map reader->iterator used if INTERVALS is defined
    final Map<SamReader, CloseableIterator<SAMRecord>> samReaderToIterator =
        new HashMap<SamReader, CloseableIterator<SAMRecord>>(INPUT.size());

    // Open the files for reading and writing
    final List<SamReader> readers = new ArrayList<SamReader>();
    final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
    {
      SAMSequenceDictionary dict = null; // Used to try and reduce redundant SDs in memory

      for (final File inFile : INPUT) {
        IOUtil.assertFileIsReadable(inFile);
        final SamReader in =
            SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
        if (INTERVALS != null) {
          if (!in.hasIndex())
            throw new PicardException(
                "Merging with interval but Bam file is not indexed " + inFile);
          final CloseableIterator<SAMRecord> samIterator =
              new SamRecordIntervalIteratorFactory()
                  .makeSamRecordIntervalIterator(in, intervalList, true);
          samReaderToIterator.put(in, samIterator);
        }

        readers.add(in);
        headers.add(in.getFileHeader());

        // A slightly hackish attempt to keep memory consumption down when merging multiple files
        // with
        // large sequence dictionaries (10,000s of sequences). If the dictionaries are identical,
        // then
        // replace the duplicate copies with a single dictionary to reduce the memory footprint.
        if (dict == null) {
          dict = in.getFileHeader().getSequenceDictionary();
        } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
          in.getFileHeader().setSequenceDictionary(dict);
        }

        matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
      }
    }

    // If all the input sort orders match the output sort order then just merge them and
    // write on the fly, otherwise setup to merge and sort before writing out the final file
    IOUtil.assertFileIsWritable(OUTPUT);
    final boolean presorted;
    final SAMFileHeader.SortOrder headerMergerSortOrder;
    final boolean mergingSamRecordIteratorAssumeSorted;

    if (matchedSortOrders
        || SORT_ORDER == SAMFileHeader.SortOrder.unsorted
        || ASSUME_SORTED
        || INTERVALS != null) {
      log.info(
          "Input files are in same order as output so sorting to temp directory is not needed.");
      headerMergerSortOrder = SORT_ORDER;
      mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
      presorted = true;
    } else {
      log.info("Sorting input files using temp directory " + TMP_DIR);
      headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
      mergingSamRecordIteratorAssumeSorted = false;
      presorted = false;
    }
    final SamFileHeaderMerger headerMerger =
        new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
    final MergingSamRecordIterator iterator;
    // no interval defined, get an iterator for the whole bam
    if (intervalList == null) {
      iterator =
          new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
    } else {
      // show warning related to https://github.com/broadinstitute/picard/pull/314/files
      log.info(
          "Warning: merged bams from different interval lists may contain the same read in both files");
      iterator = new MergingSamRecordIterator(headerMerger, samReaderToIterator, true);
    }
    final SAMFileHeader header = headerMerger.getMergedHeader();
    for (final String comment : COMMENT) {
      header.addComment(comment);
    }
    header.setSortOrder(SORT_ORDER);
    final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
    if (USE_THREADING) {
      samFileWriterFactory.setUseAsyncIo(true);
    }
    final SAMFileWriter out = samFileWriterFactory.makeSAMOrBAMWriter(header, presorted, OUTPUT);

    // Lastly loop through and write out the records
    final ProgressLogger progress = new ProgressLogger(log, PROGRESS_INTERVAL);
    while (iterator.hasNext()) {
      final SAMRecord record = iterator.next();
      out.addAlignment(record);
      progress.record(record);
    }

    log.info("Finished reading inputs.");
    for (final CloseableIterator<SAMRecord> iter : samReaderToIterator.values())
      CloserUtil.close(iter);
    CloserUtil.close(readers);
    out.close();
    return 0;
  }
  /**
   * Test that PG header records are created & chained appropriately (or not created), and that the
   * PG record chains are as expected. MarkDuplicates is used both to merge and to mark dupes in
   * this case.
   *
   * @param suppressPg If true, do not create PG header record.
   * @param expectedPnVnByReadName For each read, info about the expect chain of PG records.
   */
  @Test(dataProvider = "pgRecordChainingTest")
  public void pgRecordChainingTest(
      final boolean suppressPg, final Map<String, List<ExpectedPnAndVn>> expectedPnVnByReadName) {
    final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp");
    outputDir.deleteOnExit();
    try {
      // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header
      // record creation according to suppressPg.
      final MarkDuplicates markDuplicates = new MarkDuplicates();
      final ArrayList<String> args = new ArrayList<String>();
      for (int i = 1; i <= 3; ++i) {
        args.add("INPUT=" + new File(TEST_DATA_DIR, "merge" + i + ".sam").getAbsolutePath());
      }
      final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam");
      args.add("OUTPUT=" + outputSam.getAbsolutePath());
      args.add(
          "METRICS_FILE="
              + new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics").getAbsolutePath());
      if (suppressPg) args.add("PROGRAM_RECORD_ID=null");

      // I generally prefer to call doWork rather than invoking the argument parser, but it is
      // necessary
      // in this case to initialize the command line.
      // Note that for the unit test, version won't come through because it is obtained through jar
      // manifest, and unit test doesn't run code from a jar.
      Assert.assertEquals(markDuplicates.instanceMain(args.toArray(new String[args.size()])), 0);

      // Read the MarkDuplicates output file, and get the PG ID for each read.  In this particular
      // test,
      // the PG ID should be the same for both ends of a pair.
      final SamReader reader = SamReaderFactory.makeDefault().open(outputSam);

      final Map<String, String> pgIdForReadName = new HashMap<String, String>();
      for (final SAMRecord rec : reader) {
        final String existingPgId = pgIdForReadName.get(rec.getReadName());
        final String thisPgId = rec.getStringAttribute(SAMTag.PG.name());
        if (existingPgId != null) {
          Assert.assertEquals(thisPgId, existingPgId);
        } else {
          pgIdForReadName.put(rec.getReadName(), thisPgId);
        }
      }
      final SAMFileHeader header = reader.getFileHeader();
      CloserUtil.close(reader);

      // Confirm that for each read name, the chain of PG records contains exactly the number that
      // is expected,
      // and that values in the PG chain are as expected.
      for (final Map.Entry<String, List<ExpectedPnAndVn>> entry :
          expectedPnVnByReadName.entrySet()) {
        final String readName = entry.getKey();
        final List<ExpectedPnAndVn> expectedList = entry.getValue();
        String pgId = pgIdForReadName.get(readName);
        for (final ExpectedPnAndVn expected : expectedList) {
          final SAMProgramRecord programRecord = header.getProgramRecord(pgId);
          if (expected.expectedPn != null)
            Assert.assertEquals(programRecord.getProgramName(), expected.expectedPn);
          if (expected.expectedVn != null)
            Assert.assertEquals(programRecord.getProgramVersion(), expected.expectedVn);
          pgId = programRecord.getPreviousProgramGroupId();
        }
        Assert.assertNull(pgId);
      }

    } finally {
      TestUtil.recursiveDelete(outputDir);
    }
  }
 private BAMProcessorImplDict(File in) {
   reader = SamReaderFactory.makeDefault().open(in);
   iterator = reader.iterator();
   queue = new LinkedList<SAMRecord>();
 }