/** * Would factoring out this suffix result in elimating the reference source vertex? * * @param graph the graph * @param commonSuffix the common suffix of all toSplits * @param toSplits the list of vertices we're are trying to split * @return true if toSplit contains the reference source and this ref source has all and only the * bases of commonSuffix */ private boolean wouldEliminateRefSource( final SeqGraph graph, final SeqVertex commonSuffix, final Collection<SeqVertex> toSplits) { for (final SeqVertex toSplit : toSplits) { if (graph.isRefSource(toSplit)) return toSplit.length() == commonSuffix.length(); } return false; }
/** * Would all vertices that we'd split just result in the common suffix? * * <p>That is, suppose we have prefix nodes ABC and ABC. After splitting all of the vertices would * just be ABC again, and we'd enter into an infinite loop. * * @param commonSuffix the common suffix of all vertices in toSplits * @param toSplits the collection of vertices we want to split * @return true if all of the vertices are equal to the common suffix */ private boolean allVerticesAreTheCommonSuffix( final SeqVertex commonSuffix, final Collection<SeqVertex> toSplits) { for (final SeqVertex toSplit : toSplits) { if (toSplit.length() != commonSuffix.length()) return false; } return true; }
/** * Simple single-function interface to split and then update a graph * * @param graph the graph containing the vertices in toMerge * @param v The bottom node whose incoming vertices we'd like to split * @return true if some useful splitting was done, false otherwise */ public boolean split(final SeqGraph graph, final SeqVertex v) { if (graph == null) throw new IllegalArgumentException("graph cannot be null"); if (v == null) throw new IllegalArgumentException("v cannot be null"); if (!graph.vertexSet().contains(v)) throw new IllegalArgumentException("graph doesn't contain vertex v " + v); final Collection<SeqVertex> toSplit = graph.incomingVerticesOf(v); if (toSplit.size() < 2) // Can only split at least 2 vertices return false; else if (!safeToSplit(graph, v, toSplit)) { return false; } else { final SeqVertex suffixVTemplate = commonSuffix(toSplit); if (suffixVTemplate.isEmpty()) { return false; } else if (wouldEliminateRefSource(graph, suffixVTemplate, toSplit)) { return false; } else if (allVerticesAreTheCommonSuffix(suffixVTemplate, toSplit)) { return false; } else { final List<BaseEdge> edgesToRemove = new LinkedList<BaseEdge>(); // graph.printGraph(new File("split.pre_" + v.getSequenceString() + "." + // counter + ".dot"), 0); for (final SeqVertex mid : toSplit) { // create my own copy of the suffix final SeqVertex suffixV = new SeqVertex(suffixVTemplate.getSequence()); graph.addVertex(suffixV); final SeqVertex prefixV = mid.withoutSuffix(suffixV.getSequence()); final BaseEdge out = graph.outgoingEdgeOf(mid); final SeqVertex incomingTarget; if (prefixV == null) { // this node is entirely explained by suffix incomingTarget = suffixV; } else { incomingTarget = prefixV; graph.addVertex(prefixV); graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 1)); edgesToRemove.add(out); } graph.addEdge(suffixV, graph.getEdgeTarget(out), out.copy()); for (final BaseEdge in : graph.incomingEdgesOf(mid)) { graph.addEdge(graph.getEdgeSource(in), incomingTarget, in.copy()); edgesToRemove.add(in); } } graph.removeAllVertices(toSplit); graph.removeAllEdges(edgesToRemove); // graph.printGraph(new File("split.post_" + v.getSequenceString() + "." + // counter++ + ".dot"), 0); return true; } } }