Esempio n. 1
0
 @Test(dataProvider = "testOpticalDuplicateDetectionDataProvider")
 public void testOpticalDuplicateDetection(
     final File sam, final long expectedNumOpticalDuplicates) {
   final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp");
   outputDir.deleteOnExit();
   final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam");
   outputSam.deleteOnExit();
   final File metricsFile = new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics");
   metricsFile.deleteOnExit();
   // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header
   // record creation according to suppressPg.
   final MarkDuplicates markDuplicates = new MarkDuplicates();
   markDuplicates.setupOpticalDuplicateFinder();
   markDuplicates.INPUT = CollectionUtil.makeList(sam.getAbsolutePath());
   markDuplicates.OUTPUT = outputSam;
   markDuplicates.METRICS_FILE = metricsFile;
   markDuplicates.TMP_DIR = CollectionUtil.makeList(outputDir);
   // Needed to suppress calling CommandLineProgram.getVersion(), which doesn't work for code not
   // in a jar
   markDuplicates.PROGRAM_RECORD_ID = null;
   Assert.assertEquals(markDuplicates.doWork(), 0);
   Assert.assertEquals(markDuplicates.numOpticalDuplicates(), expectedNumOpticalDuplicates);
   TestUtil.recursiveDelete(outputDir);
 }
Esempio n. 2
0
  /**
   * Test that PG header records are created & chained appropriately (or not created), and that the
   * PG record chains are as expected. MarkDuplicates is used both to merge and to mark dupes in
   * this case.
   *
   * @param suppressPg If true, do not create PG header record.
   * @param expectedPnVnByReadName For each read, info about the expect chain of PG records.
   */
  @Test(dataProvider = "pgRecordChainingTest")
  public void pgRecordChainingTest(
      final boolean suppressPg, final Map<String, List<ExpectedPnAndVn>> expectedPnVnByReadName) {
    final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp");
    outputDir.deleteOnExit();
    try {
      // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header
      // record creation according to suppressPg.
      final MarkDuplicates markDuplicates = new MarkDuplicates();
      final ArrayList<String> args = new ArrayList<String>();
      for (int i = 1; i <= 3; ++i) {
        args.add("INPUT=" + new File(TEST_DATA_DIR, "merge" + i + ".sam").getAbsolutePath());
      }
      final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam");
      args.add("OUTPUT=" + outputSam.getAbsolutePath());
      args.add(
          "METRICS_FILE="
              + new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics").getAbsolutePath());
      if (suppressPg) args.add("PROGRAM_RECORD_ID=null");

      // I generally prefer to call doWork rather than invoking the argument parser, but it is
      // necessary
      // in this case to initialize the command line.
      // Note that for the unit test, version won't come through because it is obtained through jar
      // manifest, and unit test doesn't run code from a jar.
      Assert.assertEquals(markDuplicates.instanceMain(args.toArray(new String[args.size()])), 0);

      // Read the MarkDuplicates output file, and get the PG ID for each read.  In this particular
      // test,
      // the PG ID should be the same for both ends of a pair.
      final SamReader reader = SamReaderFactory.makeDefault().open(outputSam);

      final Map<String, String> pgIdForReadName = new HashMap<String, String>();
      for (final SAMRecord rec : reader) {
        final String existingPgId = pgIdForReadName.get(rec.getReadName());
        final String thisPgId = rec.getStringAttribute(SAMTag.PG.name());
        if (existingPgId != null) {
          Assert.assertEquals(thisPgId, existingPgId);
        } else {
          pgIdForReadName.put(rec.getReadName(), thisPgId);
        }
      }
      final SAMFileHeader header = reader.getFileHeader();
      CloserUtil.close(reader);

      // Confirm that for each read name, the chain of PG records contains exactly the number that
      // is expected,
      // and that values in the PG chain are as expected.
      for (final Map.Entry<String, List<ExpectedPnAndVn>> entry :
          expectedPnVnByReadName.entrySet()) {
        final String readName = entry.getKey();
        final List<ExpectedPnAndVn> expectedList = entry.getValue();
        String pgId = pgIdForReadName.get(readName);
        for (final ExpectedPnAndVn expected : expectedList) {
          final SAMProgramRecord programRecord = header.getProgramRecord(pgId);
          if (expected.expectedPn != null)
            Assert.assertEquals(programRecord.getProgramName(), expected.expectedPn);
          if (expected.expectedVn != null)
            Assert.assertEquals(programRecord.getProgramVersion(), expected.expectedVn);
          pgId = programRecord.getPreviousProgramGroupId();
        }
        Assert.assertNull(pgId);
      }

    } finally {
      TestUtil.recursiveDelete(outputDir);
    }
  }