/** * Test the parsing of sequence variations from Last MAF file. * * @throws IOException */ @Test public void testLastToCompactVariations() throws IOException { // test LastToCompact convert+filtering // read fake data and convert+filter final LastToCompactMode processor = new LastToCompactMode(); final int LAST_TO_COMPACT_M_PARAM = 2; processor.setAmbiguityThreshold(LAST_TO_COMPACT_M_PARAM); processor.setInputFile("test-results/alignments/last-to-compact/last-103-variations.maf"); processor.setOutputFile("test-results/alignments/last-to-compact/last-103-variations"); processor.setTargetReferenceIdsFilename( "test-results/alignments/last-to-compact/last-reference.compact-reads"); processor.setOnlyMafFile(true); processor.setNumberOfReads(2857822); processor.setLargestQueryIndex(2857822); processor.setSmallestQueryIndex(0); processor.setPropagateQueryIds(false); processor.setPropagateTargetIds(true); processor.setQualityFilterParameters( "threshold=1"); // allow everything to pass through, important to detect all variations in // this test. processor.execute(); // read compact alignment results final AlignmentReaderImpl reader = new AlignmentReaderImpl(processor.getOutputFile()); reader.readHeader(); assertEquals(2857822, reader.getNumberOfQueries()); assertEquals(1, reader.getNumberOfTargets()); int entryIndex = 0; while (reader.hasNext()) { final Alignments.AlignmentEntry alignmentEntry = reader.next(); System.out.println(entryIndex + " entry : " + alignmentEntry); switch (entryIndex) { case 0: assertEquals(2, alignmentEntry.getSequenceVariationsCount()); final Alignments.SequenceVariation var1 = alignmentEntry.getSequenceVariations(0); assertEquals("C", var1.getTo()); assertEquals("G", var1.getFrom()); assertEquals(1, var1.getPosition()); final Alignments.SequenceVariation var2 = alignmentEntry.getSequenceVariations(1); assertEquals("C", var2.getTo()); assertEquals("A", var2.getFrom()); assertEquals(11, var2.getPosition()); break; case 1: assertEquals(1, alignmentEntry.getSequenceVariationsCount()); final Alignments.SequenceVariation var1_0 = alignmentEntry.getSequenceVariations(0); assertEquals("---", var1_0.getTo()); assertEquals("TTT", var1_0.getFrom()); assertEquals(3, var1_0.getPosition()); break; case 2: assertEquals(2, alignmentEntry.getSequenceVariationsCount()); final Alignments.SequenceVariation var2_1 = alignmentEntry.getSequenceVariations(0); assertEquals("A", var2_1.getTo()); assertEquals("G", var2_1.getFrom()); assertEquals(2, var2_1.getPosition()); final Alignments.SequenceVariation var2_2 = alignmentEntry.getSequenceVariations(1); assertEquals("A", var2_2.getTo()); assertEquals("-", var2_2.getFrom()); assertEquals(29, var2_2.getPosition()); break; default: break; } entryIndex++; } assertEquals(4, entryIndex); }
@Test public void testLastToCompact1() throws IOException { // test LastToCompact convert+filtering // read fake data and convert+filter final LastToCompactMode processor = new LastToCompactMode(); final int LAST_TO_COMPACT_M_PARAM = 2; processor.setAmbiguityThreshold(LAST_TO_COMPACT_M_PARAM); processor.setInputFile("test-results/alignments/last-to-compact/last-101.maf"); processor.setOutputFile("test-results/alignments/last-to-compact/last-101.compact"); processor.setTargetReferenceIdsFilename( "test-results/alignments/last-to-compact/last-reference.compact-reads"); processor.setOnlyMafFile(true); processor.setNumberOfReads(2857819); processor.setLargestQueryIndex(2857819); processor.setSmallestQueryIndex(0); processor.setPropagateQueryIds(false); processor.setPropagateTargetIds(true); processor.execute(); // read compact alignment results final AlignmentReaderImpl reader = new AlignmentReaderImpl(processor.getOutputFile()); reader.readHeader(); assertEquals(2857819, reader.getNumberOfQueries()); assertEquals(1, reader.getNumberOfTargets()); assertTrue(reader.hasQueryIndexOccurrences()); // lookup tables final Int2IntOpenHashMap queryIndex2NumberOfHits = new Int2IntOpenHashMap(); final Int2FloatOpenHashMap queryIndex2Score = new Int2FloatOpenHashMap(); final Int2IntOpenHashMap queryIndex2Multiplicity = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2NumberOfIndels = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2NumberOfMismatches = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2Position = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2QueryAlignedLength = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2QueryPosition = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2TargetIndex = new Int2IntOpenHashMap(); final Int2BooleanOpenHashMap queryIndex2MatchingReverseStrand = new Int2BooleanOpenHashMap(); final Int2IntOpenHashMap queryIndex2TargetAlignedLength = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2QueryIndexOcc = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2Ambiguity = new Int2IntOpenHashMap(); // enter alignment data int qii; while (reader.hasNext()) { final Alignments.AlignmentEntry aln = reader.next(); qii = aln.getQueryIndex(); final int numHits = queryIndex2NumberOfHits.get(qii); queryIndex2NumberOfHits.put(qii, numHits + 1); queryIndex2Score.put(qii, aln.getScore()); queryIndex2Multiplicity.put(qii, aln.getMultiplicity()); queryIndex2NumberOfIndels.put(qii, aln.getNumberOfIndels()); queryIndex2NumberOfMismatches.put(qii, aln.getNumberOfMismatches()); queryIndex2Position.put(qii, aln.getPosition()); queryIndex2QueryAlignedLength.put(qii, aln.getQueryAlignedLength()); queryIndex2QueryPosition.put(qii, aln.getQueryPosition()); queryIndex2TargetIndex.put(qii, aln.getTargetIndex()); queryIndex2MatchingReverseStrand.put(qii, aln.getMatchingReverseStrand()); queryIndex2TargetAlignedLength.put(qii, aln.getTargetAlignedLength()); queryIndex2QueryIndexOcc.put(qii, aln.getQueryIndexOccurrences()); queryIndex2Ambiguity.put(qii, aln.getAmbiguity()); } // // validate alignment data using values from getMafInput() below // // there are a total of 5 entries with ID 2857818 // 2 entries have score = 35 (the maximum score for this ID) // 3 entries are filtered b/c their scores are below 35 qii = 2857818; assertEquals(queryIndex2NumberOfHits.get(qii), 2); assertEquals((int) queryIndex2Score.get(qii), 35); assertEquals(queryIndex2Multiplicity.get(qii), 1); assertEquals(queryIndex2NumberOfIndels.get(qii), 0); assertEquals(queryIndex2NumberOfMismatches.get(qii), 0); assertEquals(queryIndex2Position.get(qii), 1614); // last entry added assertEquals(queryIndex2QueryAlignedLength.get(qii), 35); assertEquals(queryIndex2QueryPosition.get(qii), 0); assertEquals(queryIndex2TargetIndex.get(qii), 0); assertEquals(queryIndex2MatchingReverseStrand.get(qii), false); assertEquals(queryIndex2TargetAlignedLength.get(qii), 35); assertEquals(2, queryIndex2QueryIndexOcc.get(qii)); assertEquals(2, queryIndex2Ambiguity.get(qii)); // there are 5 entries with the score = 35 (the maximum score for this ID) // filtered due to ambiguity qii = 577287; assertEquals(queryIndex2NumberOfHits.get(qii), 0); assertEquals((int) queryIndex2Score.get(qii), 0); assertEquals(queryIndex2Multiplicity.get(qii), 0); assertEquals(queryIndex2NumberOfIndels.get(qii), 0); assertEquals(queryIndex2NumberOfMismatches.get(qii), 0); assertEquals(queryIndex2Position.get(qii), 0); assertEquals(queryIndex2QueryAlignedLength.get(qii), 0); assertEquals(queryIndex2QueryPosition.get(qii), 0); assertEquals(queryIndex2TargetIndex.get(qii), 0); assertEquals(queryIndex2MatchingReverseStrand.get(qii), false); assertEquals(queryIndex2TargetAlignedLength.get(qii), 0); assertEquals(0, queryIndex2QueryIndexOcc.get(qii)); assertEquals(0, queryIndex2Ambiguity.get(qii)); // reader.close(); }