@Test(dataProvider = "loadReadsADAM", groups = "spark") public void readsSinkADAMTest(String inputBam, String outputDirectoryName) throws IOException { // Since the test requires that we not create the actual output directory in advance, // we instead create its parent directory and mark it for deletion on exit. This protects // us from naming collisions across multiple instances of the test suite. final File outputParentDirectory = createTempDir(outputDirectoryName + "_parent"); final File outputDirectory = new File(outputParentDirectory, outputDirectoryName); JavaSparkContext ctx = SparkContextFactory.getTestSparkContext(); ReadsSparkSource readSource = new ReadsSparkSource(ctx); JavaRDD<GATKRead> rddParallelReads = readSource.getParallelReads(inputBam, null); SAMFileHeader header = ReadsSparkSource.getHeader(ctx, inputBam, null); ReadsSparkSink.writeReads( ctx, outputDirectory.getAbsolutePath(), rddParallelReads, header, ReadsWriteFormat.ADAM); JavaRDD<GATKRead> rddParallelReads2 = readSource.getADAMReads(outputDirectory.getAbsolutePath(), null, header); Assert.assertEquals(rddParallelReads.count(), rddParallelReads2.count()); // Test the round trip List<GATKRead> samList = rddParallelReads.collect(); List<GATKRead> adamList = rddParallelReads2.collect(); Comparator<GATKRead> comparator = new ReadCoordinateComparator(header); samList.sort(comparator); adamList.sort(comparator); for (int i = 0; i < samList.size(); i++) { SAMRecord expected = samList.get(i).convertToSAMRecord(header); SAMRecord observed = adamList.get(i).convertToSAMRecord(header); // manually test equality of some fields, as there are issues with roundtrip BAM -> ADAM -> // BAM // see https://github.com/bigdatagenomics/adam/issues/823 Assert.assertEquals(observed.getReadName(), expected.getReadName(), "readname"); Assert.assertEquals( observed.getAlignmentStart(), expected.getAlignmentStart(), "getAlignmentStart"); Assert.assertEquals( observed.getAlignmentEnd(), expected.getAlignmentEnd(), "getAlignmentEnd"); Assert.assertEquals(observed.getFlags(), expected.getFlags(), "getFlags"); Assert.assertEquals( observed.getMappingQuality(), expected.getMappingQuality(), "getMappingQuality"); Assert.assertEquals( observed.getMateAlignmentStart(), expected.getMateAlignmentStart(), "getMateAlignmentStart"); Assert.assertEquals(observed.getCigar(), expected.getCigar(), "getCigar"); } }
/** * HACK TO CREATE GATKSAMRECORD BASED ONLY A SAMRECORD FOR TESTING PURPOSES ONLY * * @param read */ public GATKSAMRecord(final SAMRecord read) { super(read.getHeader()); super.setReferenceIndex(read.getReferenceIndex()); super.setAlignmentStart(read.getAlignmentStart()); super.setReadName(read.getReadName()); super.setMappingQuality(read.getMappingQuality()); // indexing bin done below super.setCigar(read.getCigar()); super.setFlags(read.getFlags()); super.setMateReferenceIndex(read.getMateReferenceIndex()); super.setMateAlignmentStart(read.getMateAlignmentStart()); super.setInferredInsertSize(read.getInferredInsertSize()); SAMReadGroupRecord samRG = read.getReadGroup(); SAMBinaryTagAndValue samAttr = GATKBin.getReadBinaryAttributes(read); if (samAttr == null) { clearAttributes(); } else { setAttributes(samAttr); } if (samRG != null) { GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG); setReadGroup(rg); } super.setFileSource(read.getFileSource()); super.setReadName(read.getReadName()); super.setCigarString(read.getCigarString()); super.setReadBases(read.getReadBases()); super.setBaseQualities(read.getBaseQualities()); // From SAMRecord constructor: Do this after the above because setCigarString will clear it. GATKBin.setReadIndexingBin(this, GATKBin.getReadIndexingBin(read)); }