Exemplo n.º 1
0
  /**
   * Simple single-function interface to split and then update a graph
   *
   * @param graph the graph containing the vertices in toMerge
   * @param v The bottom node whose incoming vertices we'd like to split
   * @return true if some useful splitting was done, false otherwise
   */
  public boolean split(final SeqGraph graph, final SeqVertex v) {
    if (graph == null) throw new IllegalArgumentException("graph cannot be null");
    if (v == null) throw new IllegalArgumentException("v cannot be null");
    if (!graph.vertexSet().contains(v))
      throw new IllegalArgumentException("graph doesn't contain vertex v " + v);

    final Collection<SeqVertex> toSplit = graph.incomingVerticesOf(v);
    if (toSplit.size() < 2)
      // Can only split at least 2 vertices
      return false;
    else if (!safeToSplit(graph, v, toSplit)) {
      return false;
    } else {
      final SeqVertex suffixVTemplate = commonSuffix(toSplit);
      if (suffixVTemplate.isEmpty()) {
        return false;
      } else if (wouldEliminateRefSource(graph, suffixVTemplate, toSplit)) {
        return false;
      } else if (allVerticesAreTheCommonSuffix(suffixVTemplate, toSplit)) {
        return false;
      } else {
        final List<BaseEdge> edgesToRemove = new LinkedList<BaseEdge>();

        //                graph.printGraph(new File("split.pre_" + v.getSequenceString() + "." +
        // counter + ".dot"), 0);
        for (final SeqVertex mid : toSplit) {
          // create my own copy of the suffix
          final SeqVertex suffixV = new SeqVertex(suffixVTemplate.getSequence());
          graph.addVertex(suffixV);
          final SeqVertex prefixV = mid.withoutSuffix(suffixV.getSequence());
          final BaseEdge out = graph.outgoingEdgeOf(mid);

          final SeqVertex incomingTarget;
          if (prefixV == null) {
            // this node is entirely explained by suffix
            incomingTarget = suffixV;
          } else {
            incomingTarget = prefixV;
            graph.addVertex(prefixV);
            graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 1));
            edgesToRemove.add(out);
          }

          graph.addEdge(suffixV, graph.getEdgeTarget(out), out.copy());

          for (final BaseEdge in : graph.incomingEdgesOf(mid)) {
            graph.addEdge(graph.getEdgeSource(in), incomingTarget, in.copy());
            edgesToRemove.add(in);
          }
        }

        graph.removeAllVertices(toSplit);
        graph.removeAllEdges(edgesToRemove);
        //                graph.printGraph(new File("split.post_" + v.getSequenceString() + "." +
        // counter++ + ".dot"), 0);

        return true;
      }
    }
  }
  @Test(dataProvider = "MeetsMinSequenceData")
  public void testSplitterCompleteCycle(
      final List<String> mids,
      final int minSeqLength,
      final boolean prefixMeets,
      final boolean suffixMeets) {
    final SeqGraph graph = new SeqGraph(11);

    final SeqVertex top = new SeqVertex("AAAAAAAA");
    final SeqVertex bot = new SeqVertex("GGGGGGGG");
    final List<SeqVertex> v = new ArrayList<>();
    for (final String s : mids) {
      v.add(new SeqVertex(s));
    }
    graph.addVertices(v.toArray(new SeqVertex[v.size()]));
    graph.addVertices(top, bot);
    for (final SeqVertex vi : v) {
      graph.addEdge(top, vi);
      graph.addEdge(vi, bot);
    }

    final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v);
    Assert.assertEquals(
        splitter.meetsMinMergableSequenceForPrefix(minSeqLength), prefixMeets, "Prefix failed");
    Assert.assertEquals(
        splitter.meetsMinMergableSequenceForSuffix(minSeqLength), suffixMeets, "Suffix failed");
    Assert.assertEquals(
        splitter.meetsMinMergableSequenceForEitherPrefixOrSuffix(minSeqLength),
        suffixMeets || prefixMeets,
        "Either prefix or suffix failed");
  }
  @Test(dataProvider = "CompleteCycleData")
  public void testSplitterCompleteCycle(
      final List<String> strings, final boolean hasTop, final boolean hasBot) {
    final SeqGraph graph = new SeqGraph(11);

    int edgeWeight = 1;
    final SeqVertex top = hasTop ? new SeqVertex("AAAAAAAA") : null;
    final SeqVertex bot = hasBot ? new SeqVertex("GGGGGGGG") : null;
    final List<SeqVertex> v = new ArrayList<>();
    for (final String s : strings) {
      v.add(new SeqVertex(s));
    }
    graph.addVertices(v.toArray(new SeqVertex[v.size()]));
    final SeqVertex first = v.get(0);

    if (hasTop) {
      graph.addVertex(top);
      for (final SeqVertex vi : v) graph.addEdge(top, vi, new BaseEdge(vi == first, edgeWeight++));
    }

    if (hasBot) {
      graph.addVertex(bot);
      for (final SeqVertex vi : v) graph.addEdge(vi, bot, new BaseEdge(vi == first, edgeWeight++));
    }

    final Set<String> haplotypes = new HashSet<>();
    final KBestHaplotypeFinder originalPaths =
        new KBestHaplotypeFinder((SeqGraph) graph.clone(), graph.getSources(), graph.getSinks());
    for (final KBestHaplotype path : originalPaths) haplotypes.add(new String(path.bases()));

    final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v);
    splitter.split();
    if (PRINT_GRAPHS)
      graph.printGraph(
          new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".original.dot"), 0);
    if (PRINT_GRAPHS)
      splitter.splitGraph.printGraph(
          new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".split.dot"), 0);
    splitter.updateGraph(top, bot);
    if (PRINT_GRAPHS)
      graph.printGraph(
          new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".updated.dot"), 0);

    final KBestHaplotypeFinder splitPaths =
        new KBestHaplotypeFinder(graph, graph.getSources(), graph.getSinks());
    for (final KBestHaplotype path : splitPaths) {
      final String h = new String(path.bases());
      Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h);
    }

    final List<byte[]> sortedOriginalPaths = new ArrayList<>(originalPaths.size());
    for (final KBestHaplotype kbh : originalPaths.unique()) sortedOriginalPaths.add(kbh.bases());
    Collections.sort(sortedOriginalPaths, BaseUtils.BASES_COMPARATOR);
    final List<byte[]> sortedSplitPaths = new ArrayList<>(splitPaths.size());
    for (final KBestHaplotype kbh : splitPaths.unique()) sortedSplitPaths.add(kbh.bases());
    Collections.sort(sortedSplitPaths, BaseUtils.BASES_COMPARATOR);

    Assert.assertEquals(
        sortedSplitPaths,
        sortedOriginalPaths,
        Utils.join("_", strings) + "_" + hasTop + "_" + hasBot);
  }