@Test(dataProvider = "MeetsMinSequenceData")
  public void testSplitterCompleteCycle(
      final List<String> mids,
      final int minSeqLength,
      final boolean prefixMeets,
      final boolean suffixMeets) {
    final SeqGraph graph = new SeqGraph(11);

    final SeqVertex top = new SeqVertex("AAAAAAAA");
    final SeqVertex bot = new SeqVertex("GGGGGGGG");
    final List<SeqVertex> v = new ArrayList<>();
    for (final String s : mids) {
      v.add(new SeqVertex(s));
    }
    graph.addVertices(v.toArray(new SeqVertex[v.size()]));
    graph.addVertices(top, bot);
    for (final SeqVertex vi : v) {
      graph.addEdge(top, vi);
      graph.addEdge(vi, bot);
    }

    final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v);
    Assert.assertEquals(
        splitter.meetsMinMergableSequenceForPrefix(minSeqLength), prefixMeets, "Prefix failed");
    Assert.assertEquals(
        splitter.meetsMinMergableSequenceForSuffix(minSeqLength), suffixMeets, "Suffix failed");
    Assert.assertEquals(
        splitter.meetsMinMergableSequenceForEitherPrefixOrSuffix(minSeqLength),
        suffixMeets || prefixMeets,
        "Either prefix or suffix failed");
  }
  @Test(dataProvider = "PrefixSuffixData")
  public void testSplitter(
      final List<String> strings, int expectedPrefixLen, int expectedSuffixLen) {
    final SeqGraph graph = new SeqGraph(11);

    final List<SeqVertex> v = new ArrayList<>();
    for (final String s : strings) {
      v.add(new SeqVertex(s));
    }

    graph.addVertices(v.toArray(new SeqVertex[v.size()]));

    final String expectedPrefix = strings.get(0).substring(0, expectedPrefixLen);
    final String expectedSuffix =
        strings.get(0).substring(strings.get(0).length() - expectedSuffixLen);

    final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v);
    splitter.split();

    Assert.assertEquals(splitter.prefixV.getSequenceString(), expectedPrefix);
    Assert.assertEquals(splitter.suffixV.getSequenceString(), expectedSuffix);

    Assert.assertTrue(splitter.splitGraph.outDegreeOf(splitter.prefixV) <= strings.size());
    Assert.assertEquals(splitter.splitGraph.inDegreeOf(splitter.prefixV), 0);

    Assert.assertTrue(splitter.splitGraph.inDegreeOf(splitter.suffixV) <= strings.size());
    Assert.assertEquals(splitter.splitGraph.outDegreeOf(splitter.suffixV), 0);

    for (final SeqVertex mid : splitter.newMiddles) {
      Assert.assertNotNull(splitter.splitGraph.getEdge(splitter.prefixV, mid));
      Assert.assertNotNull(splitter.splitGraph.getEdge(mid, splitter.suffixV));
    }
  }
Пример #3
0
  /**
   * Can we safely split up the vertices in toMerge?
   *
   * @param graph a graph
   * @param bot a vertex whose incoming vertices we want to split
   * @param toMerge the set of vertices we'd be splitting up
   * @return true if we can safely split up toMerge
   */
  private boolean safeToSplit(
      final SeqGraph graph, final SeqVertex bot, final Collection<SeqVertex> toMerge) {
    final Set<SeqVertex> outgoingOfBot = new HashSet<SeqVertex>(graph.outgoingVerticesOf(bot));
    for (final SeqVertex m : toMerge) {
      final Set<BaseEdge> outs = graph.outgoingEdgesOf(m);
      if (m == bot || outs.size() != 1 || !graph.outgoingVerticesOf(m).contains(bot))
        // m == bot => don't allow self cycles in the graph
        return false;
      if (outgoingOfBot.contains(m))
        // forbid cycles from bottom -> mid
        return false;
    }

    return true;
  }
Пример #4
0
 // recursively initialize nodeDests from start node
 private void initNodeDests(SeqPhase phase, SeqNode n) throws Xcept {
   if (nodeDests.get(n) != null) return;
   if (stopAtFFTs && n.fft() != null) return;
   LinkedHashSet<SeqNode> dests = new LinkedHashSet<SeqNode>();
   nodeDests.put(n, dests);
   Iterator<SeqEdge> es = graph.getEdgesFrom(n);
   while (es.hasNext()) {
     SeqEdge e = es.next();
     SeqNode n1 = e.dest();
     dests.add(n1);
     if (phase != graph.getPhase(n1)) continue;
     if (stopAtFFTs && n1.fft() != null) continue;
     initNodeDests(phase, n1);
   }
 }
Пример #5
0
 /**
  * Would factoring out this suffix result in elimating the reference source vertex?
  *
  * @param graph the graph
  * @param commonSuffix the common suffix of all toSplits
  * @param toSplits the list of vertices we're are trying to split
  * @return true if toSplit contains the reference source and this ref source has all and only the
  *     bases of commonSuffix
  */
 private boolean wouldEliminateRefSource(
     final SeqGraph graph, final SeqVertex commonSuffix, final Collection<SeqVertex> toSplits) {
   for (final SeqVertex toSplit : toSplits) {
     if (graph.isRefSource(toSplit)) return toSplit.length() == commonSuffix.length();
   }
   return false;
 }
Пример #6
0
  // constructor
  public SeqGraphXcons(
      SeqGraph graph,
      LinkedHashSet<SeqNode> startNodes,
      LinkedHashSet<SeqNode> stopNodes,
      boolean stopAtFFTs)
      throws Xcept {
    this.graph = graph;
    this.startNodes = startNodes;
    this.stopNodes = stopNodes;
    this.stopAtFFTs = stopAtFFTs;

    System.err.println("SeqGraphXCons stopAtFFTs=" + stopAtFFTs);
    nodeDests = new Hashtable<SeqNode, LinkedHashSet<SeqNode>>();
    Iterator<SeqNode> ns = startNodes.iterator();
    while (ns.hasNext()) {
      SeqNode n = ns.next();
      SeqPhase p = graph.getPhase(n);
      initNodeDests(p, n);
    }
    System.err.println("  nodeDests=" + nodeDests);
  }
Пример #7
0
  /**
   * Simple single-function interface to split and then update a graph
   *
   * @param graph the graph containing the vertices in toMerge
   * @param v The bottom node whose incoming vertices we'd like to split
   * @return true if some useful splitting was done, false otherwise
   */
  public boolean split(final SeqGraph graph, final SeqVertex v) {
    if (graph == null) throw new IllegalArgumentException("graph cannot be null");
    if (v == null) throw new IllegalArgumentException("v cannot be null");
    if (!graph.vertexSet().contains(v))
      throw new IllegalArgumentException("graph doesn't contain vertex v " + v);

    final Collection<SeqVertex> toSplit = graph.incomingVerticesOf(v);
    if (toSplit.size() < 2)
      // Can only split at least 2 vertices
      return false;
    else if (!safeToSplit(graph, v, toSplit)) {
      return false;
    } else {
      final SeqVertex suffixVTemplate = commonSuffix(toSplit);
      if (suffixVTemplate.isEmpty()) {
        return false;
      } else if (wouldEliminateRefSource(graph, suffixVTemplate, toSplit)) {
        return false;
      } else if (allVerticesAreTheCommonSuffix(suffixVTemplate, toSplit)) {
        return false;
      } else {
        final List<BaseEdge> edgesToRemove = new LinkedList<BaseEdge>();

        //                graph.printGraph(new File("split.pre_" + v.getSequenceString() + "." +
        // counter + ".dot"), 0);
        for (final SeqVertex mid : toSplit) {
          // create my own copy of the suffix
          final SeqVertex suffixV = new SeqVertex(suffixVTemplate.getSequence());
          graph.addVertex(suffixV);
          final SeqVertex prefixV = mid.withoutSuffix(suffixV.getSequence());
          final BaseEdge out = graph.outgoingEdgeOf(mid);

          final SeqVertex incomingTarget;
          if (prefixV == null) {
            // this node is entirely explained by suffix
            incomingTarget = suffixV;
          } else {
            incomingTarget = prefixV;
            graph.addVertex(prefixV);
            graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 1));
            edgesToRemove.add(out);
          }

          graph.addEdge(suffixV, graph.getEdgeTarget(out), out.copy());

          for (final BaseEdge in : graph.incomingEdgesOf(mid)) {
            graph.addEdge(graph.getEdgeSource(in), incomingTarget, in.copy());
            edgesToRemove.add(in);
          }
        }

        graph.removeAllVertices(toSplit);
        graph.removeAllEdges(edgesToRemove);
        //                graph.printGraph(new File("split.post_" + v.getSequenceString() + "." +
        // counter++ + ".dot"), 0);

        return true;
      }
    }
  }
  @Test(dataProvider = "CompleteCycleData")
  public void testSplitterCompleteCycle(
      final List<String> strings, final boolean hasTop, final boolean hasBot) {
    final SeqGraph graph = new SeqGraph(11);

    int edgeWeight = 1;
    final SeqVertex top = hasTop ? new SeqVertex("AAAAAAAA") : null;
    final SeqVertex bot = hasBot ? new SeqVertex("GGGGGGGG") : null;
    final List<SeqVertex> v = new ArrayList<>();
    for (final String s : strings) {
      v.add(new SeqVertex(s));
    }
    graph.addVertices(v.toArray(new SeqVertex[v.size()]));
    final SeqVertex first = v.get(0);

    if (hasTop) {
      graph.addVertex(top);
      for (final SeqVertex vi : v) graph.addEdge(top, vi, new BaseEdge(vi == first, edgeWeight++));
    }

    if (hasBot) {
      graph.addVertex(bot);
      for (final SeqVertex vi : v) graph.addEdge(vi, bot, new BaseEdge(vi == first, edgeWeight++));
    }

    final Set<String> haplotypes = new HashSet<>();
    final KBestHaplotypeFinder originalPaths =
        new KBestHaplotypeFinder((SeqGraph) graph.clone(), graph.getSources(), graph.getSinks());
    for (final KBestHaplotype path : originalPaths) haplotypes.add(new String(path.bases()));

    final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v);
    splitter.split();
    if (PRINT_GRAPHS)
      graph.printGraph(
          new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".original.dot"), 0);
    if (PRINT_GRAPHS)
      splitter.splitGraph.printGraph(
          new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".split.dot"), 0);
    splitter.updateGraph(top, bot);
    if (PRINT_GRAPHS)
      graph.printGraph(
          new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".updated.dot"), 0);

    final KBestHaplotypeFinder splitPaths =
        new KBestHaplotypeFinder(graph, graph.getSources(), graph.getSinks());
    for (final KBestHaplotype path : splitPaths) {
      final String h = new String(path.bases());
      Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h);
    }

    final List<byte[]> sortedOriginalPaths = new ArrayList<>(originalPaths.size());
    for (final KBestHaplotype kbh : originalPaths.unique()) sortedOriginalPaths.add(kbh.bases());
    Collections.sort(sortedOriginalPaths, BaseUtils.BASES_COMPARATOR);
    final List<byte[]> sortedSplitPaths = new ArrayList<>(splitPaths.size());
    for (final KBestHaplotype kbh : splitPaths.unique()) sortedSplitPaths.add(kbh.bases());
    Collections.sort(sortedSplitPaths, BaseUtils.BASES_COMPARATOR);

    Assert.assertEquals(
        sortedSplitPaths,
        sortedOriginalPaths,
        Utils.join("_", strings) + "_" + hasTop + "_" + hasBot);
  }