Beispiel #1
0
  @Test
  public void testVCFHeaderSerialization() throws Exception {
    final VCFFileReader reader =
        new VCFFileReader(new File("testdata/htsjdk/variant/HiSeq.10000.vcf"), false);
    final VCFHeader originalHeader = reader.getFileHeader();
    reader.close();

    final VCFHeader deserializedHeader = TestUtil.serializeAndDeserialize(originalHeader);

    Assert.assertEquals(
        deserializedHeader.getMetaDataInInputOrder(),
        originalHeader.getMetaDataInInputOrder(),
        "Header metadata does not match before/after serialization");
    Assert.assertEquals(
        deserializedHeader.getContigLines(),
        originalHeader.getContigLines(),
        "Contig header lines do not match before/after serialization");
    Assert.assertEquals(
        deserializedHeader.getFilterLines(),
        originalHeader.getFilterLines(),
        "Filter header lines do not match before/after serialization");
    Assert.assertEquals(
        deserializedHeader.getFormatHeaderLines(),
        originalHeader.getFormatHeaderLines(),
        "Format header lines do not match before/after serialization");
    Assert.assertEquals(
        deserializedHeader.getIDHeaderLines(),
        originalHeader.getIDHeaderLines(),
        "ID header lines do not match before/after serialization");
    Assert.assertEquals(
        deserializedHeader.getInfoHeaderLines(),
        originalHeader.getInfoHeaderLines(),
        "Info header lines do not match before/after serialization");
    Assert.assertEquals(
        deserializedHeader.getOtherHeaderLines(),
        originalHeader.getOtherHeaderLines(),
        "Other header lines do not match before/after serialization");
    Assert.assertEquals(
        deserializedHeader.getGenotypeSamples(),
        originalHeader.getGenotypeSamples(),
        "Genotype samples not the same before/after serialization");
    Assert.assertEquals(
        deserializedHeader.samplesWereAlreadySorted(),
        originalHeader.samplesWereAlreadySorted(),
        "Sortedness of samples not the same before/after serialization");
    Assert.assertEquals(
        deserializedHeader.getSampleNamesInOrder(),
        originalHeader.getSampleNamesInOrder(),
        "Sorted list of sample names in header not the same before/after serialization");
    Assert.assertEquals(
        deserializedHeader.getSampleNameToOffset(),
        originalHeader.getSampleNameToOffset(),
        "Sample name to offset map not the same before/after serialization");
    Assert.assertEquals(
        deserializedHeader.toString(),
        originalHeader.toString(),
        "String representation of header not the same before/after serialization");
  }
 @Test(dataProvider = "testOpticalDuplicateDetectionDataProvider")
 public void testOpticalDuplicateDetection(
     final File sam, final long expectedNumOpticalDuplicates) {
   final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp");
   outputDir.deleteOnExit();
   final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam");
   outputSam.deleteOnExit();
   final File metricsFile = new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics");
   metricsFile.deleteOnExit();
   // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header
   // record creation according to suppressPg.
   final MarkDuplicates markDuplicates = new MarkDuplicates();
   markDuplicates.setupOpticalDuplicateFinder();
   markDuplicates.INPUT = CollectionUtil.makeList(sam.getAbsolutePath());
   markDuplicates.OUTPUT = outputSam;
   markDuplicates.METRICS_FILE = metricsFile;
   markDuplicates.TMP_DIR = CollectionUtil.makeList(outputDir);
   // Needed to suppress calling CommandLineProgram.getVersion(), which doesn't work for code not
   // in a jar
   markDuplicates.PROGRAM_RECORD_ID = null;
   Assert.assertEquals(markDuplicates.doWork(), 0);
   Assert.assertEquals(markDuplicates.numOpticalDuplicates(), expectedNumOpticalDuplicates);
   TestUtil.recursiveDelete(outputDir);
 }
  /**
   * Test that PG header records are created & chained appropriately (or not created), and that the
   * PG record chains are as expected. MarkDuplicates is used both to merge and to mark dupes in
   * this case.
   *
   * @param suppressPg If true, do not create PG header record.
   * @param expectedPnVnByReadName For each read, info about the expect chain of PG records.
   */
  @Test(dataProvider = "pgRecordChainingTest")
  public void pgRecordChainingTest(
      final boolean suppressPg, final Map<String, List<ExpectedPnAndVn>> expectedPnVnByReadName) {
    final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp");
    outputDir.deleteOnExit();
    try {
      // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header
      // record creation according to suppressPg.
      final MarkDuplicates markDuplicates = new MarkDuplicates();
      final ArrayList<String> args = new ArrayList<String>();
      for (int i = 1; i <= 3; ++i) {
        args.add("INPUT=" + new File(TEST_DATA_DIR, "merge" + i + ".sam").getAbsolutePath());
      }
      final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam");
      args.add("OUTPUT=" + outputSam.getAbsolutePath());
      args.add(
          "METRICS_FILE="
              + new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics").getAbsolutePath());
      if (suppressPg) args.add("PROGRAM_RECORD_ID=null");

      // I generally prefer to call doWork rather than invoking the argument parser, but it is
      // necessary
      // in this case to initialize the command line.
      // Note that for the unit test, version won't come through because it is obtained through jar
      // manifest, and unit test doesn't run code from a jar.
      Assert.assertEquals(markDuplicates.instanceMain(args.toArray(new String[args.size()])), 0);

      // Read the MarkDuplicates output file, and get the PG ID for each read.  In this particular
      // test,
      // the PG ID should be the same for both ends of a pair.
      final SamReader reader = SamReaderFactory.makeDefault().open(outputSam);

      final Map<String, String> pgIdForReadName = new HashMap<String, String>();
      for (final SAMRecord rec : reader) {
        final String existingPgId = pgIdForReadName.get(rec.getReadName());
        final String thisPgId = rec.getStringAttribute(SAMTag.PG.name());
        if (existingPgId != null) {
          Assert.assertEquals(thisPgId, existingPgId);
        } else {
          pgIdForReadName.put(rec.getReadName(), thisPgId);
        }
      }
      final SAMFileHeader header = reader.getFileHeader();
      CloserUtil.close(reader);

      // Confirm that for each read name, the chain of PG records contains exactly the number that
      // is expected,
      // and that values in the PG chain are as expected.
      for (final Map.Entry<String, List<ExpectedPnAndVn>> entry :
          expectedPnVnByReadName.entrySet()) {
        final String readName = entry.getKey();
        final List<ExpectedPnAndVn> expectedList = entry.getValue();
        String pgId = pgIdForReadName.get(readName);
        for (final ExpectedPnAndVn expected : expectedList) {
          final SAMProgramRecord programRecord = header.getProgramRecord(pgId);
          if (expected.expectedPn != null)
            Assert.assertEquals(programRecord.getProgramName(), expected.expectedPn);
          if (expected.expectedVn != null)
            Assert.assertEquals(programRecord.getProgramVersion(), expected.expectedVn);
          pgId = programRecord.getPreviousProgramGroupId();
        }
        Assert.assertNull(pgId);
      }

    } finally {
      TestUtil.recursiveDelete(outputDir);
    }
  }