private double scoreGraphChange(Node y, Set<Node> parents1, Set<Node> parents2) { int yIndex = hashIndices.get(y); double score1, score2; int[] parentIndices1 = new int[parents1.size()]; int count = -1; for (Node parent : parents1) { parentIndices1[++count] = hashIndices.get(parent); } if (isDiscrete()) { score1 = localDiscreteScore(yIndex, parentIndices1); } else { score1 = localSemScore(yIndex, parentIndices1); } int[] parentIndices2 = new int[parents2.size()]; int count2 = -1; for (Node parent : parents2) { parentIndices2[++count2] = hashIndices.get(parent); } if (isDiscrete()) { score2 = localDiscreteScore(yIndex, parentIndices2); } else { score2 = localSemScore(yIndex, parentIndices2); } return score1 - score2; }
private boolean quartetVanishes(Set<Integer> quartet) { if (quartet.size() != 4) throw new IllegalArgumentException("Expecting a quartet, size = " + quartet.size()); Iterator<Integer> iter = quartet.iterator(); int x = iter.next(); int y = iter.next(); int z = iter.next(); int w = iter.next(); return testVanishing(x, y, z, w); }
// ===========================SCORING METHODS===================// public double scoreDag(Graph graph) { Graph dag = new EdgeListGraphSingleConnections(graph); buildIndexing(graph); double score = 0.0; for (Node y : dag.getNodes()) { Set<Node> parents = new HashSet<Node>(dag.getParents(y)); int nextIndex = -1; for (int i = 0; i < getVariables().size(); i++) { nextIndex = hashIndices.get(variables.get(i)); } int parentIndices[] = new int[parents.size()]; Iterator<Node> pi = parents.iterator(); int count = 0; while (pi.hasNext()) { Node nextParent = pi.next(); parentIndices[count++] = hashIndices.get(nextParent); } if (this.isDiscrete()) { score += localDiscreteScore(nextIndex, parentIndices); } else { score += localSemScore(nextIndex, parentIndices); } } return score; }
private boolean clique(Set<Integer> cluster, Map<Node, Set<Node>> adjacencies) { List<Integer> _cluster = new ArrayList<Integer>(cluster); for (int i = 0; i < cluster.size(); i++) { for (int j = i + 1; j < cluster.size(); j++) { Node nodei = variables.get(_cluster.get(i)); Node nodej = variables.get(_cluster.get(j)); if (!adjacencies.get(nodei).contains(nodej)) { return false; } } } return true; }
/** * Transforms a maximally directed pattern (PDAG) represented in graph <code>g</code> into an * arbitrary DAG by modifying <code>g</code> itself. Based on the algorithm described in * Chickering (2002) "Optimal structure identification with greedy search" Journal of Machine * Learning Research. R. Silva, June 2004 */ public static void pdagToDag(Graph g) { Graph p = new EdgeListGraph(g); List<Edge> undirectedEdges = new ArrayList<Edge>(); for (Edge edge : g.getEdges()) { if (edge.getEndpoint1() == Endpoint.TAIL && edge.getEndpoint2() == Endpoint.TAIL && !undirectedEdges.contains(edge)) { undirectedEdges.add(edge); } } g.removeEdges(undirectedEdges); List<Node> pNodes = p.getNodes(); do { Node x = null; for (Node pNode : pNodes) { x = pNode; if (p.getChildren(x).size() > 0) { continue; } Set<Node> neighbors = new HashSet<Node>(); for (Edge edge : p.getEdges()) { if (edge.getNode1() == x || edge.getNode2() == x) { if (edge.getEndpoint1() == Endpoint.TAIL && edge.getEndpoint2() == Endpoint.TAIL) { if (edge.getNode1() == x) { neighbors.add(edge.getNode2()); } else { neighbors.add(edge.getNode1()); } } } } if (neighbors.size() > 0) { Collection<Node> parents = p.getParents(x); Set<Node> all = new HashSet<Node>(neighbors); all.addAll(parents); if (!GraphUtils.isClique(all, p)) { continue; } } for (Node neighbor : neighbors) { Node node1 = g.getNode(neighbor.getName()); Node node2 = g.getNode(x.getName()); g.addDirectedEdge(node1, node2); } p.removeNode(x); break; } pNodes.remove(x); } while (pNodes.size() > 0); }
private Set<List<Set<Integer>>> combineClusters( Set<Set<Integer>> ESeeds, List<Set<Integer>> CSeeds) { Set<Set<Integer>> EClusters = finishESeeds(ESeeds); Set<Integer> Cs = new HashSet(); for (int i = 0; i < variables.size(); i++) Cs.add(i); Set<Integer> Es = new HashSet(); for (Set<Integer> ECluster : EClusters) Es.addAll(ECluster); Cs.removeAll(Es); List<List<Set<Integer>>> Clusters = new ArrayList(); for (Set<Integer> ECluster : EClusters) { List<Set<Integer>> newCluster = new ArrayList<Set<Integer>>(); newCluster.add(1, ECluster); Clusters.add(newCluster); } List<Set<Integer>> EClustersArray = new ArrayList<Set<Integer>>(); for (Set<Integer> ECluster : EClusters) EClustersArray.add(ECluster); for (Integer c : Cs) { int match = -1; int overlap = 0; boolean pass = false; for (int i = 0; i < EClusters.size(); i++) { Set<Integer> ECluster = EClustersArray.get(i); Set<Integer> intersection = ECluster; intersection.retainAll(CSeeds.get(c)); int _overlap = intersection.size(); if (_overlap > overlap) { overlap = _overlap; match = i; if (overlap / ECluster.size() > CIparameter) { pass = true; } } } if (pass) { List<Set<Integer>> modCluster = new ArrayList<Set<Integer>>(); Set<Integer> newCs = Clusters.get(match).get(0); newCs.add(c); modCluster.add(newCs); modCluster.add(EClustersArray.get(match)); Clusters.set(match, modCluster); } } Set<List<Set<Integer>>> ClusterSet = new HashSet<List<Set<Integer>>>(Clusters); return ClusterSet; }
private Set<Integer> triple(int n1, int n2, int n3) { Set<Integer> triple = new HashSet<Integer>(); triple.add(n1); triple.add(n2); triple.add(n3); if (triple.size() < 3) throw new IllegalArgumentException( "Triple elements must be unique: <" + n1 + ", " + n2 + ", " + n3 + ">"); return triple; }
private Set<Integer> quartet(int x, int y, int z, int w) { Set<Integer> set = new HashSet<Integer>(); set.add(x); set.add(y); set.add(z); set.add(w); if (set.size() < 4) throw new IllegalArgumentException( "Quartet elements must be unique: <" + x + ", " + y + ", " + z + ", " + w + ">"); return set; }
private Set<Set<Integer>> finishESeeds(Set<Set<Integer>> ESeeds) { log("Growing Effect Seeds.", true); Set<Set<Integer>> grown = new HashSet<Set<Integer>>(); List<Integer> _variables = new ArrayList<Integer>(); for (int i = 0; i < variables.size(); i++) _variables.add(i); // Lax grow phase with speedup. if (algType == AlgType.lax) { Set<Integer> t = new HashSet<Integer>(); int count = 0; int total = ESeeds.size(); do { if (!ESeeds.iterator().hasNext()) { break; } Set<Integer> cluster = ESeeds.iterator().next(); Set<Integer> _cluster = new HashSet<Integer>(cluster); if (extraShuffle) { Collections.shuffle(_variables); } for (int o : _variables) { if (_cluster.contains(o)) continue; List<Integer> _cluster2 = new ArrayList<Integer>(_cluster); int rejected = 0; int accepted = 0; ChoiceGenerator gen = new ChoiceGenerator(_cluster2.size(), 2); int[] choice; while ((choice = gen.next()) != null) { int n1 = _cluster2.get(choice[0]); int n2 = _cluster2.get(choice[1]); t.clear(); t.add(n1); t.add(n2); t.add(o); if (!ESeeds.contains(t)) { rejected++; } else { accepted++; } } if (rejected > accepted) { continue; } _cluster.add(o); // if (!(avgSumLnP(new ArrayList<Integer>(_cluster)) > -10)) { // _cluster.remove(o); // } } // This takes out all pure clusters that are subsets of _cluster. ChoiceGenerator gen2 = new ChoiceGenerator(_cluster.size(), 3); int[] choice2; List<Integer> _cluster3 = new ArrayList<Integer>(_cluster); while ((choice2 = gen2.next()) != null) { int n1 = _cluster3.get(choice2[0]); int n2 = _cluster3.get(choice2[1]); int n3 = _cluster3.get(choice2[2]); t.clear(); t.add(n1); t.add(n2); t.add(n3); ESeeds.remove(t); } if (verbose) { System.out.println( "Grown " + (++count) + " of " + total + ": " + variablesForIndices(new ArrayList<Integer>(_cluster))); } grown.add(_cluster); } while (!ESeeds.isEmpty()); } // Lax grow phase without speedup. if (algType == AlgType.laxWithSpeedup) { int count = 0; int total = ESeeds.size(); // Optimized lax version of grow phase. for (Set<Integer> cluster : new HashSet<Set<Integer>>(ESeeds)) { Set<Integer> _cluster = new HashSet<Integer>(cluster); if (extraShuffle) { Collections.shuffle(_variables); } for (int o : _variables) { if (_cluster.contains(o)) continue; List<Integer> _cluster2 = new ArrayList<Integer>(_cluster); int rejected = 0; int accepted = 0; // ChoiceGenerator gen = new ChoiceGenerator(_cluster2.size(), 2); int[] choice; while ((choice = gen.next()) != null) { int n1 = _cluster2.get(choice[0]); int n2 = _cluster2.get(choice[1]); Set<Integer> triple = triple(n1, n2, o); if (!ESeeds.contains(triple)) { rejected++; } else { accepted++; } } // if (rejected > accepted) { continue; } // System.out.println("Adding " + o + " to " + cluster); _cluster.add(o); } for (Set<Integer> c : new HashSet<Set<Integer>>(ESeeds)) { if (_cluster.containsAll(c)) { ESeeds.remove(c); } } if (verbose) { System.out.println("Grown " + (++count) + " of " + total + ": " + _cluster); } grown.add(_cluster); } } // Strict grow phase. if (algType == AlgType.strict) { Set<Integer> t = new HashSet<Integer>(); int count = 0; int total = ESeeds.size(); do { if (!ESeeds.iterator().hasNext()) { break; } Set<Integer> cluster = ESeeds.iterator().next(); Set<Integer> _cluster = new HashSet<Integer>(cluster); if (extraShuffle) { Collections.shuffle(_variables); } VARIABLES: for (int o : _variables) { if (_cluster.contains(o)) continue; List<Integer> _cluster2 = new ArrayList<Integer>(_cluster); ChoiceGenerator gen = new ChoiceGenerator(_cluster2.size(), 2); int[] choice; while ((choice = gen.next()) != null) { int n1 = _cluster2.get(choice[0]); int n2 = _cluster2.get(choice[1]); t.clear(); t.add(n1); t.add(n2); t.add(o); if (!ESeeds.contains(t)) { continue VARIABLES; } // if (avgSumLnP(new ArrayList<Integer>(t)) < -10) continue // CLUSTER; } _cluster.add(o); } // This takes out all pure clusters that are subsets of _cluster. ChoiceGenerator gen2 = new ChoiceGenerator(_cluster.size(), 3); int[] choice2; List<Integer> _cluster3 = new ArrayList<Integer>(_cluster); while ((choice2 = gen2.next()) != null) { int n1 = _cluster3.get(choice2[0]); int n2 = _cluster3.get(choice2[1]); int n3 = _cluster3.get(choice2[2]); t.clear(); t.add(n1); t.add(n2); t.add(n3); ESeeds.remove(t); } if (verbose) { System.out.println("Grown " + (++count) + " of " + total + ": " + _cluster); } grown.add(_cluster); } while (!ESeeds.isEmpty()); } // Optimized pick phase. log("Choosing among grown Effect Clusters.", true); for (Set<Integer> l : grown) { ArrayList<Integer> _l = new ArrayList<Integer>(l); Collections.sort(_l); if (verbose) { log("Grown: " + variablesForIndices(_l), false); } } Set<Set<Integer>> out = new HashSet<Set<Integer>>(); List<Set<Integer>> list = new ArrayList<Set<Integer>>(grown); // final Map<Set<Integer>, Double> pValues = new HashMap<Set<Integer>, Double>(); // // for (Set<Integer> o : grown) { // pValues.put(o, getP(new ArrayList<Integer>(o))); // } Collections.sort( list, new Comparator<Set<Integer>>() { @Override public int compare(Set<Integer> o1, Set<Integer> o2) { // if (o1.size() == o2.size()) { // double chisq1 = pValues.get(o1); // double chisq2 = pValues.get(o2); // return Double.compare(chisq2, chisq1); // } return o2.size() - o1.size(); } }); // for (Set<Integer> o : list) { // if (pValues.get(o) < alpha) continue; // System.out.println(variablesForIndices(new ArrayList<Integer>(o)) + " p = " + // pValues.get(o)); // } Set<Integer> all = new HashSet<Integer>(); CLUSTER: for (Set<Integer> cluster : list) { // if (pValues.get(cluster) < alpha) continue; for (Integer i : cluster) { if (all.contains(i)) continue CLUSTER; } out.add(cluster); // if (getPMulticluster(out) < alpha) { // out.remove(cluster); // continue; // } all.addAll(cluster); } return out; }