/** * Simple single-function interface to split and then update a graph * * @param graph the graph containing the vertices in toMerge * @param v The bottom node whose incoming vertices we'd like to split * @return true if some useful splitting was done, false otherwise */ public boolean split(final SeqGraph graph, final SeqVertex v) { if (graph == null) throw new IllegalArgumentException("graph cannot be null"); if (v == null) throw new IllegalArgumentException("v cannot be null"); if (!graph.vertexSet().contains(v)) throw new IllegalArgumentException("graph doesn't contain vertex v " + v); final Collection<SeqVertex> toSplit = graph.incomingVerticesOf(v); if (toSplit.size() < 2) // Can only split at least 2 vertices return false; else if (!safeToSplit(graph, v, toSplit)) { return false; } else { final SeqVertex suffixVTemplate = commonSuffix(toSplit); if (suffixVTemplate.isEmpty()) { return false; } else if (wouldEliminateRefSource(graph, suffixVTemplate, toSplit)) { return false; } else if (allVerticesAreTheCommonSuffix(suffixVTemplate, toSplit)) { return false; } else { final List<BaseEdge> edgesToRemove = new LinkedList<BaseEdge>(); // graph.printGraph(new File("split.pre_" + v.getSequenceString() + "." + // counter + ".dot"), 0); for (final SeqVertex mid : toSplit) { // create my own copy of the suffix final SeqVertex suffixV = new SeqVertex(suffixVTemplate.getSequence()); graph.addVertex(suffixV); final SeqVertex prefixV = mid.withoutSuffix(suffixV.getSequence()); final BaseEdge out = graph.outgoingEdgeOf(mid); final SeqVertex incomingTarget; if (prefixV == null) { // this node is entirely explained by suffix incomingTarget = suffixV; } else { incomingTarget = prefixV; graph.addVertex(prefixV); graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 1)); edgesToRemove.add(out); } graph.addEdge(suffixV, graph.getEdgeTarget(out), out.copy()); for (final BaseEdge in : graph.incomingEdgesOf(mid)) { graph.addEdge(graph.getEdgeSource(in), incomingTarget, in.copy()); edgesToRemove.add(in); } } graph.removeAllVertices(toSplit); graph.removeAllEdges(edgesToRemove); // graph.printGraph(new File("split.post_" + v.getSequenceString() + "." + // counter++ + ".dot"), 0); return true; } } }
@Test(dataProvider = "MeetsMinSequenceData") public void testSplitterCompleteCycle( final List<String> mids, final int minSeqLength, final boolean prefixMeets, final boolean suffixMeets) { final SeqGraph graph = new SeqGraph(11); final SeqVertex top = new SeqVertex("AAAAAAAA"); final SeqVertex bot = new SeqVertex("GGGGGGGG"); final List<SeqVertex> v = new ArrayList<>(); for (final String s : mids) { v.add(new SeqVertex(s)); } graph.addVertices(v.toArray(new SeqVertex[v.size()])); graph.addVertices(top, bot); for (final SeqVertex vi : v) { graph.addEdge(top, vi); graph.addEdge(vi, bot); } final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v); Assert.assertEquals( splitter.meetsMinMergableSequenceForPrefix(minSeqLength), prefixMeets, "Prefix failed"); Assert.assertEquals( splitter.meetsMinMergableSequenceForSuffix(minSeqLength), suffixMeets, "Suffix failed"); Assert.assertEquals( splitter.meetsMinMergableSequenceForEitherPrefixOrSuffix(minSeqLength), suffixMeets || prefixMeets, "Either prefix or suffix failed"); }
@Test(dataProvider = "CompleteCycleData") public void testSplitterCompleteCycle( final List<String> strings, final boolean hasTop, final boolean hasBot) { final SeqGraph graph = new SeqGraph(11); int edgeWeight = 1; final SeqVertex top = hasTop ? new SeqVertex("AAAAAAAA") : null; final SeqVertex bot = hasBot ? new SeqVertex("GGGGGGGG") : null; final List<SeqVertex> v = new ArrayList<>(); for (final String s : strings) { v.add(new SeqVertex(s)); } graph.addVertices(v.toArray(new SeqVertex[v.size()])); final SeqVertex first = v.get(0); if (hasTop) { graph.addVertex(top); for (final SeqVertex vi : v) graph.addEdge(top, vi, new BaseEdge(vi == first, edgeWeight++)); } if (hasBot) { graph.addVertex(bot); for (final SeqVertex vi : v) graph.addEdge(vi, bot, new BaseEdge(vi == first, edgeWeight++)); } final Set<String> haplotypes = new HashSet<>(); final KBestHaplotypeFinder originalPaths = new KBestHaplotypeFinder((SeqGraph) graph.clone(), graph.getSources(), graph.getSinks()); for (final KBestHaplotype path : originalPaths) haplotypes.add(new String(path.bases())); final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v); splitter.split(); if (PRINT_GRAPHS) graph.printGraph( new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".original.dot"), 0); if (PRINT_GRAPHS) splitter.splitGraph.printGraph( new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".split.dot"), 0); splitter.updateGraph(top, bot); if (PRINT_GRAPHS) graph.printGraph( new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".updated.dot"), 0); final KBestHaplotypeFinder splitPaths = new KBestHaplotypeFinder(graph, graph.getSources(), graph.getSinks()); for (final KBestHaplotype path : splitPaths) { final String h = new String(path.bases()); Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h); } final List<byte[]> sortedOriginalPaths = new ArrayList<>(originalPaths.size()); for (final KBestHaplotype kbh : originalPaths.unique()) sortedOriginalPaths.add(kbh.bases()); Collections.sort(sortedOriginalPaths, BaseUtils.BASES_COMPARATOR); final List<byte[]> sortedSplitPaths = new ArrayList<>(splitPaths.size()); for (final KBestHaplotype kbh : splitPaths.unique()) sortedSplitPaths.add(kbh.bases()); Collections.sort(sortedSplitPaths, BaseUtils.BASES_COMPARATOR); Assert.assertEquals( sortedSplitPaths, sortedOriginalPaths, Utils.join("_", strings) + "_" + hasTop + "_" + hasBot); }