@Test public void testVCFHeaderSerialization() throws Exception { final VCFFileReader reader = new VCFFileReader(new File("testdata/htsjdk/variant/HiSeq.10000.vcf"), false); final VCFHeader originalHeader = reader.getFileHeader(); reader.close(); final VCFHeader deserializedHeader = TestUtil.serializeAndDeserialize(originalHeader); Assert.assertEquals( deserializedHeader.getMetaDataInInputOrder(), originalHeader.getMetaDataInInputOrder(), "Header metadata does not match before/after serialization"); Assert.assertEquals( deserializedHeader.getContigLines(), originalHeader.getContigLines(), "Contig header lines do not match before/after serialization"); Assert.assertEquals( deserializedHeader.getFilterLines(), originalHeader.getFilterLines(), "Filter header lines do not match before/after serialization"); Assert.assertEquals( deserializedHeader.getFormatHeaderLines(), originalHeader.getFormatHeaderLines(), "Format header lines do not match before/after serialization"); Assert.assertEquals( deserializedHeader.getIDHeaderLines(), originalHeader.getIDHeaderLines(), "ID header lines do not match before/after serialization"); Assert.assertEquals( deserializedHeader.getInfoHeaderLines(), originalHeader.getInfoHeaderLines(), "Info header lines do not match before/after serialization"); Assert.assertEquals( deserializedHeader.getOtherHeaderLines(), originalHeader.getOtherHeaderLines(), "Other header lines do not match before/after serialization"); Assert.assertEquals( deserializedHeader.getGenotypeSamples(), originalHeader.getGenotypeSamples(), "Genotype samples not the same before/after serialization"); Assert.assertEquals( deserializedHeader.samplesWereAlreadySorted(), originalHeader.samplesWereAlreadySorted(), "Sortedness of samples not the same before/after serialization"); Assert.assertEquals( deserializedHeader.getSampleNamesInOrder(), originalHeader.getSampleNamesInOrder(), "Sorted list of sample names in header not the same before/after serialization"); Assert.assertEquals( deserializedHeader.getSampleNameToOffset(), originalHeader.getSampleNameToOffset(), "Sample name to offset map not the same before/after serialization"); Assert.assertEquals( deserializedHeader.toString(), originalHeader.toString(), "String representation of header not the same before/after serialization"); }
@Test(dataProvider = "testOpticalDuplicateDetectionDataProvider") public void testOpticalDuplicateDetection( final File sam, final long expectedNumOpticalDuplicates) { final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp"); outputDir.deleteOnExit(); final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam"); outputSam.deleteOnExit(); final File metricsFile = new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics"); metricsFile.deleteOnExit(); // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header // record creation according to suppressPg. final MarkDuplicates markDuplicates = new MarkDuplicates(); markDuplicates.setupOpticalDuplicateFinder(); markDuplicates.INPUT = CollectionUtil.makeList(sam.getAbsolutePath()); markDuplicates.OUTPUT = outputSam; markDuplicates.METRICS_FILE = metricsFile; markDuplicates.TMP_DIR = CollectionUtil.makeList(outputDir); // Needed to suppress calling CommandLineProgram.getVersion(), which doesn't work for code not // in a jar markDuplicates.PROGRAM_RECORD_ID = null; Assert.assertEquals(markDuplicates.doWork(), 0); Assert.assertEquals(markDuplicates.numOpticalDuplicates(), expectedNumOpticalDuplicates); TestUtil.recursiveDelete(outputDir); }
/** * Test that PG header records are created & chained appropriately (or not created), and that the * PG record chains are as expected. MarkDuplicates is used both to merge and to mark dupes in * this case. * * @param suppressPg If true, do not create PG header record. * @param expectedPnVnByReadName For each read, info about the expect chain of PG records. */ @Test(dataProvider = "pgRecordChainingTest") public void pgRecordChainingTest( final boolean suppressPg, final Map<String, List<ExpectedPnAndVn>> expectedPnVnByReadName) { final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp"); outputDir.deleteOnExit(); try { // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header // record creation according to suppressPg. final MarkDuplicates markDuplicates = new MarkDuplicates(); final ArrayList<String> args = new ArrayList<String>(); for (int i = 1; i <= 3; ++i) { args.add("INPUT=" + new File(TEST_DATA_DIR, "merge" + i + ".sam").getAbsolutePath()); } final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam"); args.add("OUTPUT=" + outputSam.getAbsolutePath()); args.add( "METRICS_FILE=" + new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics").getAbsolutePath()); if (suppressPg) args.add("PROGRAM_RECORD_ID=null"); // I generally prefer to call doWork rather than invoking the argument parser, but it is // necessary // in this case to initialize the command line. // Note that for the unit test, version won't come through because it is obtained through jar // manifest, and unit test doesn't run code from a jar. Assert.assertEquals(markDuplicates.instanceMain(args.toArray(new String[args.size()])), 0); // Read the MarkDuplicates output file, and get the PG ID for each read. In this particular // test, // the PG ID should be the same for both ends of a pair. final SamReader reader = SamReaderFactory.makeDefault().open(outputSam); final Map<String, String> pgIdForReadName = new HashMap<String, String>(); for (final SAMRecord rec : reader) { final String existingPgId = pgIdForReadName.get(rec.getReadName()); final String thisPgId = rec.getStringAttribute(SAMTag.PG.name()); if (existingPgId != null) { Assert.assertEquals(thisPgId, existingPgId); } else { pgIdForReadName.put(rec.getReadName(), thisPgId); } } final SAMFileHeader header = reader.getFileHeader(); CloserUtil.close(reader); // Confirm that for each read name, the chain of PG records contains exactly the number that // is expected, // and that values in the PG chain are as expected. for (final Map.Entry<String, List<ExpectedPnAndVn>> entry : expectedPnVnByReadName.entrySet()) { final String readName = entry.getKey(); final List<ExpectedPnAndVn> expectedList = entry.getValue(); String pgId = pgIdForReadName.get(readName); for (final ExpectedPnAndVn expected : expectedList) { final SAMProgramRecord programRecord = header.getProgramRecord(pgId); if (expected.expectedPn != null) Assert.assertEquals(programRecord.getProgramName(), expected.expectedPn); if (expected.expectedVn != null) Assert.assertEquals(programRecord.getProgramVersion(), expected.expectedVn); pgId = programRecord.getPreviousProgramGroupId(); } Assert.assertNull(pgId); } } finally { TestUtil.recursiveDelete(outputDir); } }