@Test(dataProvider = "MeetsMinSequenceData") public void testSplitterCompleteCycle( final List<String> mids, final int minSeqLength, final boolean prefixMeets, final boolean suffixMeets) { final SeqGraph graph = new SeqGraph(11); final SeqVertex top = new SeqVertex("AAAAAAAA"); final SeqVertex bot = new SeqVertex("GGGGGGGG"); final List<SeqVertex> v = new ArrayList<>(); for (final String s : mids) { v.add(new SeqVertex(s)); } graph.addVertices(v.toArray(new SeqVertex[v.size()])); graph.addVertices(top, bot); for (final SeqVertex vi : v) { graph.addEdge(top, vi); graph.addEdge(vi, bot); } final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v); Assert.assertEquals( splitter.meetsMinMergableSequenceForPrefix(minSeqLength), prefixMeets, "Prefix failed"); Assert.assertEquals( splitter.meetsMinMergableSequenceForSuffix(minSeqLength), suffixMeets, "Suffix failed"); Assert.assertEquals( splitter.meetsMinMergableSequenceForEitherPrefixOrSuffix(minSeqLength), suffixMeets || prefixMeets, "Either prefix or suffix failed"); }
@Test(dataProvider = "PrefixSuffixData") public void testSplitter( final List<String> strings, int expectedPrefixLen, int expectedSuffixLen) { final SeqGraph graph = new SeqGraph(11); final List<SeqVertex> v = new ArrayList<>(); for (final String s : strings) { v.add(new SeqVertex(s)); } graph.addVertices(v.toArray(new SeqVertex[v.size()])); final String expectedPrefix = strings.get(0).substring(0, expectedPrefixLen); final String expectedSuffix = strings.get(0).substring(strings.get(0).length() - expectedSuffixLen); final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v); splitter.split(); Assert.assertEquals(splitter.prefixV.getSequenceString(), expectedPrefix); Assert.assertEquals(splitter.suffixV.getSequenceString(), expectedSuffix); Assert.assertTrue(splitter.splitGraph.outDegreeOf(splitter.prefixV) <= strings.size()); Assert.assertEquals(splitter.splitGraph.inDegreeOf(splitter.prefixV), 0); Assert.assertTrue(splitter.splitGraph.inDegreeOf(splitter.suffixV) <= strings.size()); Assert.assertEquals(splitter.splitGraph.outDegreeOf(splitter.suffixV), 0); for (final SeqVertex mid : splitter.newMiddles) { Assert.assertNotNull(splitter.splitGraph.getEdge(splitter.prefixV, mid)); Assert.assertNotNull(splitter.splitGraph.getEdge(mid, splitter.suffixV)); } }
@DataProvider(name = "MeetsMinSequenceData") public Object[][] makeMeetsMinSequenceData() { final List<Object[]> tests = new ArrayList<>(); final boolean prefixBiased = SharedVertexSequenceSplitter.prefersPrefixMerging(); tests.add(new Object[] {Arrays.asList("AC", "AC"), 0, true, true}); tests.add(new Object[] {Arrays.asList("AC", "AC"), 1, prefixBiased, !prefixBiased}); tests.add(new Object[] {Arrays.asList("AC", "AC"), 2, prefixBiased, !prefixBiased}); tests.add(new Object[] {Arrays.asList("AC", "AC"), 3, false, false}); tests.add(new Object[] {Arrays.asList("A", "AC"), 1, true, false}); tests.add(new Object[] {Arrays.asList("A", "AC"), 2, false, false}); tests.add(new Object[] {Arrays.asList("AT", "AC"), 1, true, false}); tests.add(new Object[] {Arrays.asList("AAT", "AAC"), 1, true, false}); tests.add(new Object[] {Arrays.asList("AAT", "AAC"), 2, true, false}); tests.add(new Object[] {Arrays.asList("AAT", "AAC"), 3, false, false}); tests.add(new Object[] {Arrays.asList("AATCCC", "AACCCC"), 1, true, true}); tests.add(new Object[] {Arrays.asList("AATCCC", "AACCCC"), 2, true, true}); tests.add(new Object[] {Arrays.asList("AATCCC", "AACCCC"), 3, false, true}); tests.add(new Object[] {Arrays.asList("AATCCC", "AACCCC"), 4, false, false}); return tests.toArray(new Object[][] {}); }
@Test(dataProvider = "PrefixSuffixData") public void testPrefixSuffixVertices( final List<String> strings, int expectedPrefixLen, int expectedSuffixLen) { final List<SeqVertex> v = new ArrayList<>(); for (final String s : strings) { v.add(new SeqVertex(s)); } final String expectedPrefix = strings.get(0).substring(0, expectedPrefixLen); final String expectedSuffix = strings.get(0).substring(strings.get(0).length() - expectedSuffixLen); final Pair<SeqVertex, SeqVertex> result = SharedVertexSequenceSplitter.commonPrefixAndSuffixOfVertices(v); Assert.assertEquals( result.getFirst().getSequenceString(), expectedPrefix, "Failed suffix test"); Assert.assertEquals( result.getSecond().getSequenceString(), expectedSuffix, "Failed suffix test"); Assert.assertEquals(result.getFirst().isEmpty(), expectedPrefix.isEmpty()); Assert.assertEquals(result.getSecond().isEmpty(), expectedSuffix.isEmpty()); }
@Test(dataProvider = "CompleteCycleData") public void testSplitterCompleteCycle( final List<String> strings, final boolean hasTop, final boolean hasBot) { final SeqGraph graph = new SeqGraph(11); int edgeWeight = 1; final SeqVertex top = hasTop ? new SeqVertex("AAAAAAAA") : null; final SeqVertex bot = hasBot ? new SeqVertex("GGGGGGGG") : null; final List<SeqVertex> v = new ArrayList<>(); for (final String s : strings) { v.add(new SeqVertex(s)); } graph.addVertices(v.toArray(new SeqVertex[v.size()])); final SeqVertex first = v.get(0); if (hasTop) { graph.addVertex(top); for (final SeqVertex vi : v) graph.addEdge(top, vi, new BaseEdge(vi == first, edgeWeight++)); } if (hasBot) { graph.addVertex(bot); for (final SeqVertex vi : v) graph.addEdge(vi, bot, new BaseEdge(vi == first, edgeWeight++)); } final Set<String> haplotypes = new HashSet<>(); final KBestHaplotypeFinder originalPaths = new KBestHaplotypeFinder((SeqGraph) graph.clone(), graph.getSources(), graph.getSinks()); for (final KBestHaplotype path : originalPaths) haplotypes.add(new String(path.bases())); final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v); splitter.split(); if (PRINT_GRAPHS) graph.printGraph( new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".original.dot"), 0); if (PRINT_GRAPHS) splitter.splitGraph.printGraph( new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".split.dot"), 0); splitter.updateGraph(top, bot); if (PRINT_GRAPHS) graph.printGraph( new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".updated.dot"), 0); final KBestHaplotypeFinder splitPaths = new KBestHaplotypeFinder(graph, graph.getSources(), graph.getSinks()); for (final KBestHaplotype path : splitPaths) { final String h = new String(path.bases()); Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h); } final List<byte[]> sortedOriginalPaths = new ArrayList<>(originalPaths.size()); for (final KBestHaplotype kbh : originalPaths.unique()) sortedOriginalPaths.add(kbh.bases()); Collections.sort(sortedOriginalPaths, BaseUtils.BASES_COMPARATOR); final List<byte[]> sortedSplitPaths = new ArrayList<>(splitPaths.size()); for (final KBestHaplotype kbh : splitPaths.unique()) sortedSplitPaths.add(kbh.bases()); Collections.sort(sortedSplitPaths, BaseUtils.BASES_COMPARATOR); Assert.assertEquals( sortedSplitPaths, sortedOriginalPaths, Utils.join("_", strings) + "_" + hasTop + "_" + hasBot); }