예제 #1
0
  private double scoreGraphChange(Node y, Set<Node> parents1, Set<Node> parents2) {
    int yIndex = hashIndices.get(y);

    double score1, score2;

    int[] parentIndices1 = new int[parents1.size()];

    int count = -1;
    for (Node parent : parents1) {
      parentIndices1[++count] = hashIndices.get(parent);
    }

    if (isDiscrete()) {
      score1 = localDiscreteScore(yIndex, parentIndices1);
    } else {
      score1 = localSemScore(yIndex, parentIndices1);
    }

    int[] parentIndices2 = new int[parents2.size()];

    int count2 = -1;
    for (Node parent : parents2) {
      parentIndices2[++count2] = hashIndices.get(parent);
    }

    if (isDiscrete()) {
      score2 = localDiscreteScore(yIndex, parentIndices2);
    } else {
      score2 = localSemScore(yIndex, parentIndices2);
    }

    return score1 - score2;
  }
  private boolean quartetVanishes(Set<Integer> quartet) {
    if (quartet.size() != 4)
      throw new IllegalArgumentException("Expecting a quartet, size = " + quartet.size());

    Iterator<Integer> iter = quartet.iterator();
    int x = iter.next();
    int y = iter.next();
    int z = iter.next();
    int w = iter.next();

    return testVanishing(x, y, z, w);
  }
예제 #3
0
  // ===========================SCORING METHODS===================//
  public double scoreDag(Graph graph) {
    Graph dag = new EdgeListGraphSingleConnections(graph);
    buildIndexing(graph);

    double score = 0.0;

    for (Node y : dag.getNodes()) {
      Set<Node> parents = new HashSet<Node>(dag.getParents(y));
      int nextIndex = -1;
      for (int i = 0; i < getVariables().size(); i++) {
        nextIndex = hashIndices.get(variables.get(i));
      }
      int parentIndices[] = new int[parents.size()];
      Iterator<Node> pi = parents.iterator();
      int count = 0;
      while (pi.hasNext()) {
        Node nextParent = pi.next();
        parentIndices[count++] = hashIndices.get(nextParent);
      }

      if (this.isDiscrete()) {
        score += localDiscreteScore(nextIndex, parentIndices);
      } else {
        score += localSemScore(nextIndex, parentIndices);
      }
    }
    return score;
  }
  private boolean clique(Set<Integer> cluster, Map<Node, Set<Node>> adjacencies) {
    List<Integer> _cluster = new ArrayList<Integer>(cluster);

    for (int i = 0; i < cluster.size(); i++) {
      for (int j = i + 1; j < cluster.size(); j++) {
        Node nodei = variables.get(_cluster.get(i));
        Node nodej = variables.get(_cluster.get(j));

        if (!adjacencies.get(nodei).contains(nodej)) {
          return false;
        }
      }
    }

    return true;
  }
  /**
   * Transforms a maximally directed pattern (PDAG) represented in graph <code>g</code> into an
   * arbitrary DAG by modifying <code>g</code> itself. Based on the algorithm described in
   * Chickering (2002) "Optimal structure identification with greedy search" Journal of Machine
   * Learning Research. R. Silva, June 2004
   */
  public static void pdagToDag(Graph g) {
    Graph p = new EdgeListGraph(g);
    List<Edge> undirectedEdges = new ArrayList<Edge>();

    for (Edge edge : g.getEdges()) {
      if (edge.getEndpoint1() == Endpoint.TAIL
          && edge.getEndpoint2() == Endpoint.TAIL
          && !undirectedEdges.contains(edge)) {
        undirectedEdges.add(edge);
      }
    }
    g.removeEdges(undirectedEdges);
    List<Node> pNodes = p.getNodes();

    do {
      Node x = null;

      for (Node pNode : pNodes) {
        x = pNode;

        if (p.getChildren(x).size() > 0) {
          continue;
        }

        Set<Node> neighbors = new HashSet<Node>();

        for (Edge edge : p.getEdges()) {
          if (edge.getNode1() == x || edge.getNode2() == x) {
            if (edge.getEndpoint1() == Endpoint.TAIL && edge.getEndpoint2() == Endpoint.TAIL) {
              if (edge.getNode1() == x) {
                neighbors.add(edge.getNode2());
              } else {
                neighbors.add(edge.getNode1());
              }
            }
          }
        }
        if (neighbors.size() > 0) {
          Collection<Node> parents = p.getParents(x);
          Set<Node> all = new HashSet<Node>(neighbors);
          all.addAll(parents);
          if (!GraphUtils.isClique(all, p)) {
            continue;
          }
        }

        for (Node neighbor : neighbors) {
          Node node1 = g.getNode(neighbor.getName());
          Node node2 = g.getNode(x.getName());

          g.addDirectedEdge(node1, node2);
        }
        p.removeNode(x);
        break;
      }
      pNodes.remove(x);
    } while (pNodes.size() > 0);
  }
 private Set<List<Set<Integer>>> combineClusters(
     Set<Set<Integer>> ESeeds, List<Set<Integer>> CSeeds) {
   Set<Set<Integer>> EClusters = finishESeeds(ESeeds);
   Set<Integer> Cs = new HashSet();
   for (int i = 0; i < variables.size(); i++) Cs.add(i);
   Set<Integer> Es = new HashSet();
   for (Set<Integer> ECluster : EClusters) Es.addAll(ECluster);
   Cs.removeAll(Es);
   List<List<Set<Integer>>> Clusters = new ArrayList();
   for (Set<Integer> ECluster : EClusters) {
     List<Set<Integer>> newCluster = new ArrayList<Set<Integer>>();
     newCluster.add(1, ECluster);
     Clusters.add(newCluster);
   }
   List<Set<Integer>> EClustersArray = new ArrayList<Set<Integer>>();
   for (Set<Integer> ECluster : EClusters) EClustersArray.add(ECluster);
   for (Integer c : Cs) {
     int match = -1;
     int overlap = 0;
     boolean pass = false;
     for (int i = 0; i < EClusters.size(); i++) {
       Set<Integer> ECluster = EClustersArray.get(i);
       Set<Integer> intersection = ECluster;
       intersection.retainAll(CSeeds.get(c));
       int _overlap = intersection.size();
       if (_overlap > overlap) {
         overlap = _overlap;
         match = i;
         if (overlap / ECluster.size() > CIparameter) {
           pass = true;
         }
       }
     }
     if (pass) {
       List<Set<Integer>> modCluster = new ArrayList<Set<Integer>>();
       Set<Integer> newCs = Clusters.get(match).get(0);
       newCs.add(c);
       modCluster.add(newCs);
       modCluster.add(EClustersArray.get(match));
       Clusters.set(match, modCluster);
     }
   }
   Set<List<Set<Integer>>> ClusterSet = new HashSet<List<Set<Integer>>>(Clusters);
   return ClusterSet;
 }
  private Set<Integer> triple(int n1, int n2, int n3) {
    Set<Integer> triple = new HashSet<Integer>();
    triple.add(n1);
    triple.add(n2);
    triple.add(n3);

    if (triple.size() < 3)
      throw new IllegalArgumentException(
          "Triple elements must be unique: <" + n1 + ", " + n2 + ", " + n3 + ">");

    return triple;
  }
  private Set<Integer> quartet(int x, int y, int z, int w) {
    Set<Integer> set = new HashSet<Integer>();
    set.add(x);
    set.add(y);
    set.add(z);
    set.add(w);

    if (set.size() < 4)
      throw new IllegalArgumentException(
          "Quartet elements must be unique: <" + x + ", " + y + ", " + z + ", " + w + ">");

    return set;
  }
  private Set<Set<Integer>> finishESeeds(Set<Set<Integer>> ESeeds) {
    log("Growing Effect Seeds.", true);
    Set<Set<Integer>> grown = new HashSet<Set<Integer>>();

    List<Integer> _variables = new ArrayList<Integer>();
    for (int i = 0; i < variables.size(); i++) _variables.add(i);

    // Lax grow phase with speedup.
    if (algType == AlgType.lax) {
      Set<Integer> t = new HashSet<Integer>();
      int count = 0;
      int total = ESeeds.size();

      do {
        if (!ESeeds.iterator().hasNext()) {
          break;
        }

        Set<Integer> cluster = ESeeds.iterator().next();
        Set<Integer> _cluster = new HashSet<Integer>(cluster);

        if (extraShuffle) {
          Collections.shuffle(_variables);
        }

        for (int o : _variables) {
          if (_cluster.contains(o)) continue;

          List<Integer> _cluster2 = new ArrayList<Integer>(_cluster);
          int rejected = 0;
          int accepted = 0;

          ChoiceGenerator gen = new ChoiceGenerator(_cluster2.size(), 2);
          int[] choice;

          while ((choice = gen.next()) != null) {
            int n1 = _cluster2.get(choice[0]);
            int n2 = _cluster2.get(choice[1]);

            t.clear();
            t.add(n1);
            t.add(n2);
            t.add(o);

            if (!ESeeds.contains(t)) {
              rejected++;
            } else {
              accepted++;
            }
          }

          if (rejected > accepted) {
            continue;
          }

          _cluster.add(o);

          //                    if (!(avgSumLnP(new ArrayList<Integer>(_cluster)) > -10)) {
          //                        _cluster.remove(o);
          //                    }
        }

        // This takes out all pure clusters that are subsets of _cluster.
        ChoiceGenerator gen2 = new ChoiceGenerator(_cluster.size(), 3);
        int[] choice2;
        List<Integer> _cluster3 = new ArrayList<Integer>(_cluster);

        while ((choice2 = gen2.next()) != null) {
          int n1 = _cluster3.get(choice2[0]);
          int n2 = _cluster3.get(choice2[1]);
          int n3 = _cluster3.get(choice2[2]);

          t.clear();
          t.add(n1);
          t.add(n2);
          t.add(n3);

          ESeeds.remove(t);
        }

        if (verbose) {
          System.out.println(
              "Grown "
                  + (++count)
                  + " of "
                  + total
                  + ": "
                  + variablesForIndices(new ArrayList<Integer>(_cluster)));
        }
        grown.add(_cluster);
      } while (!ESeeds.isEmpty());
    }

    // Lax grow phase without speedup.
    if (algType == AlgType.laxWithSpeedup) {
      int count = 0;
      int total = ESeeds.size();

      // Optimized lax version of grow phase.
      for (Set<Integer> cluster : new HashSet<Set<Integer>>(ESeeds)) {
        Set<Integer> _cluster = new HashSet<Integer>(cluster);

        if (extraShuffle) {
          Collections.shuffle(_variables);
        }

        for (int o : _variables) {
          if (_cluster.contains(o)) continue;

          List<Integer> _cluster2 = new ArrayList<Integer>(_cluster);
          int rejected = 0;
          int accepted = 0;
          //
          ChoiceGenerator gen = new ChoiceGenerator(_cluster2.size(), 2);
          int[] choice;

          while ((choice = gen.next()) != null) {
            int n1 = _cluster2.get(choice[0]);
            int n2 = _cluster2.get(choice[1]);

            Set<Integer> triple = triple(n1, n2, o);

            if (!ESeeds.contains(triple)) {
              rejected++;
            } else {
              accepted++;
            }
          }
          //
          if (rejected > accepted) {
            continue;
          }

          //                    System.out.println("Adding " + o  + " to " + cluster);
          _cluster.add(o);
        }

        for (Set<Integer> c : new HashSet<Set<Integer>>(ESeeds)) {
          if (_cluster.containsAll(c)) {
            ESeeds.remove(c);
          }
        }

        if (verbose) {
          System.out.println("Grown " + (++count) + " of " + total + ": " + _cluster);
        }

        grown.add(_cluster);
      }
    }

    // Strict grow phase.
    if (algType == AlgType.strict) {
      Set<Integer> t = new HashSet<Integer>();
      int count = 0;
      int total = ESeeds.size();

      do {
        if (!ESeeds.iterator().hasNext()) {
          break;
        }

        Set<Integer> cluster = ESeeds.iterator().next();
        Set<Integer> _cluster = new HashSet<Integer>(cluster);

        if (extraShuffle) {
          Collections.shuffle(_variables);
        }

        VARIABLES:
        for (int o : _variables) {
          if (_cluster.contains(o)) continue;

          List<Integer> _cluster2 = new ArrayList<Integer>(_cluster);

          ChoiceGenerator gen = new ChoiceGenerator(_cluster2.size(), 2);
          int[] choice;

          while ((choice = gen.next()) != null) {
            int n1 = _cluster2.get(choice[0]);
            int n2 = _cluster2.get(choice[1]);

            t.clear();
            t.add(n1);
            t.add(n2);
            t.add(o);

            if (!ESeeds.contains(t)) {
              continue VARIABLES;
            }

            //                        if (avgSumLnP(new ArrayList<Integer>(t)) < -10) continue
            // CLUSTER;
          }

          _cluster.add(o);
        }

        // This takes out all pure clusters that are subsets of _cluster.
        ChoiceGenerator gen2 = new ChoiceGenerator(_cluster.size(), 3);
        int[] choice2;
        List<Integer> _cluster3 = new ArrayList<Integer>(_cluster);

        while ((choice2 = gen2.next()) != null) {
          int n1 = _cluster3.get(choice2[0]);
          int n2 = _cluster3.get(choice2[1]);
          int n3 = _cluster3.get(choice2[2]);

          t.clear();
          t.add(n1);
          t.add(n2);
          t.add(n3);

          ESeeds.remove(t);
        }

        if (verbose) {
          System.out.println("Grown " + (++count) + " of " + total + ": " + _cluster);
        }
        grown.add(_cluster);
      } while (!ESeeds.isEmpty());
    }

    // Optimized pick phase.
    log("Choosing among grown Effect Clusters.", true);

    for (Set<Integer> l : grown) {
      ArrayList<Integer> _l = new ArrayList<Integer>(l);
      Collections.sort(_l);
      if (verbose) {
        log("Grown: " + variablesForIndices(_l), false);
      }
    }

    Set<Set<Integer>> out = new HashSet<Set<Integer>>();

    List<Set<Integer>> list = new ArrayList<Set<Integer>>(grown);

    //        final Map<Set<Integer>, Double> pValues = new HashMap<Set<Integer>, Double>();
    //
    //        for (Set<Integer> o : grown) {
    //            pValues.put(o, getP(new ArrayList<Integer>(o)));
    //        }

    Collections.sort(
        list,
        new Comparator<Set<Integer>>() {
          @Override
          public int compare(Set<Integer> o1, Set<Integer> o2) {
            //                if (o1.size() == o2.size()) {
            //                    double chisq1 = pValues.get(o1);
            //                    double chisq2 = pValues.get(o2);
            //                    return Double.compare(chisq2, chisq1);
            //                }

            return o2.size() - o1.size();
          }
        });

    //        for (Set<Integer> o : list) {
    //            if (pValues.get(o) < alpha) continue;
    //            System.out.println(variablesForIndices(new ArrayList<Integer>(o)) + "  p = " +
    // pValues.get(o));
    //        }

    Set<Integer> all = new HashSet<Integer>();

    CLUSTER:
    for (Set<Integer> cluster : list) {
      //            if (pValues.get(cluster) < alpha) continue;

      for (Integer i : cluster) {
        if (all.contains(i)) continue CLUSTER;
      }

      out.add(cluster);

      //            if (getPMulticluster(out) < alpha) {
      //                out.remove(cluster);
      //                continue;
      //            }

      all.addAll(cluster);
    }

    return out;
  }