private Set<Integer> unionPure(Set<Set<Integer>> pureClusters) { Set<Integer> unionPure = new HashSet<Integer>(); for (Set<Integer> cluster : pureClusters) { unionPure.addAll(cluster); } return unionPure; }
private Set<List<Set<Integer>>> combineClusters( Set<Set<Integer>> ESeeds, List<Set<Integer>> CSeeds) { Set<Set<Integer>> EClusters = finishESeeds(ESeeds); Set<Integer> Cs = new HashSet(); for (int i = 0; i < variables.size(); i++) Cs.add(i); Set<Integer> Es = new HashSet(); for (Set<Integer> ECluster : EClusters) Es.addAll(ECluster); Cs.removeAll(Es); List<List<Set<Integer>>> Clusters = new ArrayList(); for (Set<Integer> ECluster : EClusters) { List<Set<Integer>> newCluster = new ArrayList<Set<Integer>>(); newCluster.add(1, ECluster); Clusters.add(newCluster); } List<Set<Integer>> EClustersArray = new ArrayList<Set<Integer>>(); for (Set<Integer> ECluster : EClusters) EClustersArray.add(ECluster); for (Integer c : Cs) { int match = -1; int overlap = 0; boolean pass = false; for (int i = 0; i < EClusters.size(); i++) { Set<Integer> ECluster = EClustersArray.get(i); Set<Integer> intersection = ECluster; intersection.retainAll(CSeeds.get(c)); int _overlap = intersection.size(); if (_overlap > overlap) { overlap = _overlap; match = i; if (overlap / ECluster.size() > CIparameter) { pass = true; } } } if (pass) { List<Set<Integer>> modCluster = new ArrayList<Set<Integer>>(); Set<Integer> newCs = Clusters.get(match).get(0); newCs.add(c); modCluster.add(newCs); modCluster.add(EClustersArray.get(match)); Clusters.set(match, modCluster); } } Set<List<Set<Integer>>> ClusterSet = new HashSet<List<Set<Integer>>>(Clusters); return ClusterSet; }
private Set<Set<Integer>> finishESeeds(Set<Set<Integer>> ESeeds) { log("Growing Effect Seeds.", true); Set<Set<Integer>> grown = new HashSet<Set<Integer>>(); List<Integer> _variables = new ArrayList<Integer>(); for (int i = 0; i < variables.size(); i++) _variables.add(i); // Lax grow phase with speedup. if (algType == AlgType.lax) { Set<Integer> t = new HashSet<Integer>(); int count = 0; int total = ESeeds.size(); do { if (!ESeeds.iterator().hasNext()) { break; } Set<Integer> cluster = ESeeds.iterator().next(); Set<Integer> _cluster = new HashSet<Integer>(cluster); if (extraShuffle) { Collections.shuffle(_variables); } for (int o : _variables) { if (_cluster.contains(o)) continue; List<Integer> _cluster2 = new ArrayList<Integer>(_cluster); int rejected = 0; int accepted = 0; ChoiceGenerator gen = new ChoiceGenerator(_cluster2.size(), 2); int[] choice; while ((choice = gen.next()) != null) { int n1 = _cluster2.get(choice[0]); int n2 = _cluster2.get(choice[1]); t.clear(); t.add(n1); t.add(n2); t.add(o); if (!ESeeds.contains(t)) { rejected++; } else { accepted++; } } if (rejected > accepted) { continue; } _cluster.add(o); // if (!(avgSumLnP(new ArrayList<Integer>(_cluster)) > -10)) { // _cluster.remove(o); // } } // This takes out all pure clusters that are subsets of _cluster. ChoiceGenerator gen2 = new ChoiceGenerator(_cluster.size(), 3); int[] choice2; List<Integer> _cluster3 = new ArrayList<Integer>(_cluster); while ((choice2 = gen2.next()) != null) { int n1 = _cluster3.get(choice2[0]); int n2 = _cluster3.get(choice2[1]); int n3 = _cluster3.get(choice2[2]); t.clear(); t.add(n1); t.add(n2); t.add(n3); ESeeds.remove(t); } if (verbose) { System.out.println( "Grown " + (++count) + " of " + total + ": " + variablesForIndices(new ArrayList<Integer>(_cluster))); } grown.add(_cluster); } while (!ESeeds.isEmpty()); } // Lax grow phase without speedup. if (algType == AlgType.laxWithSpeedup) { int count = 0; int total = ESeeds.size(); // Optimized lax version of grow phase. for (Set<Integer> cluster : new HashSet<Set<Integer>>(ESeeds)) { Set<Integer> _cluster = new HashSet<Integer>(cluster); if (extraShuffle) { Collections.shuffle(_variables); } for (int o : _variables) { if (_cluster.contains(o)) continue; List<Integer> _cluster2 = new ArrayList<Integer>(_cluster); int rejected = 0; int accepted = 0; // ChoiceGenerator gen = new ChoiceGenerator(_cluster2.size(), 2); int[] choice; while ((choice = gen.next()) != null) { int n1 = _cluster2.get(choice[0]); int n2 = _cluster2.get(choice[1]); Set<Integer> triple = triple(n1, n2, o); if (!ESeeds.contains(triple)) { rejected++; } else { accepted++; } } // if (rejected > accepted) { continue; } // System.out.println("Adding " + o + " to " + cluster); _cluster.add(o); } for (Set<Integer> c : new HashSet<Set<Integer>>(ESeeds)) { if (_cluster.containsAll(c)) { ESeeds.remove(c); } } if (verbose) { System.out.println("Grown " + (++count) + " of " + total + ": " + _cluster); } grown.add(_cluster); } } // Strict grow phase. if (algType == AlgType.strict) { Set<Integer> t = new HashSet<Integer>(); int count = 0; int total = ESeeds.size(); do { if (!ESeeds.iterator().hasNext()) { break; } Set<Integer> cluster = ESeeds.iterator().next(); Set<Integer> _cluster = new HashSet<Integer>(cluster); if (extraShuffle) { Collections.shuffle(_variables); } VARIABLES: for (int o : _variables) { if (_cluster.contains(o)) continue; List<Integer> _cluster2 = new ArrayList<Integer>(_cluster); ChoiceGenerator gen = new ChoiceGenerator(_cluster2.size(), 2); int[] choice; while ((choice = gen.next()) != null) { int n1 = _cluster2.get(choice[0]); int n2 = _cluster2.get(choice[1]); t.clear(); t.add(n1); t.add(n2); t.add(o); if (!ESeeds.contains(t)) { continue VARIABLES; } // if (avgSumLnP(new ArrayList<Integer>(t)) < -10) continue // CLUSTER; } _cluster.add(o); } // This takes out all pure clusters that are subsets of _cluster. ChoiceGenerator gen2 = new ChoiceGenerator(_cluster.size(), 3); int[] choice2; List<Integer> _cluster3 = new ArrayList<Integer>(_cluster); while ((choice2 = gen2.next()) != null) { int n1 = _cluster3.get(choice2[0]); int n2 = _cluster3.get(choice2[1]); int n3 = _cluster3.get(choice2[2]); t.clear(); t.add(n1); t.add(n2); t.add(n3); ESeeds.remove(t); } if (verbose) { System.out.println("Grown " + (++count) + " of " + total + ": " + _cluster); } grown.add(_cluster); } while (!ESeeds.isEmpty()); } // Optimized pick phase. log("Choosing among grown Effect Clusters.", true); for (Set<Integer> l : grown) { ArrayList<Integer> _l = new ArrayList<Integer>(l); Collections.sort(_l); if (verbose) { log("Grown: " + variablesForIndices(_l), false); } } Set<Set<Integer>> out = new HashSet<Set<Integer>>(); List<Set<Integer>> list = new ArrayList<Set<Integer>>(grown); // final Map<Set<Integer>, Double> pValues = new HashMap<Set<Integer>, Double>(); // // for (Set<Integer> o : grown) { // pValues.put(o, getP(new ArrayList<Integer>(o))); // } Collections.sort( list, new Comparator<Set<Integer>>() { @Override public int compare(Set<Integer> o1, Set<Integer> o2) { // if (o1.size() == o2.size()) { // double chisq1 = pValues.get(o1); // double chisq2 = pValues.get(o2); // return Double.compare(chisq2, chisq1); // } return o2.size() - o1.size(); } }); // for (Set<Integer> o : list) { // if (pValues.get(o) < alpha) continue; // System.out.println(variablesForIndices(new ArrayList<Integer>(o)) + " p = " + // pValues.get(o)); // } Set<Integer> all = new HashSet<Integer>(); CLUSTER: for (Set<Integer> cluster : list) { // if (pValues.get(cluster) < alpha) continue; for (Integer i : cluster) { if (all.contains(i)) continue CLUSTER; } out.add(cluster); // if (getPMulticluster(out) < alpha) { // out.remove(cluster); // continue; // } all.addAll(cluster); } return out; }
private Void findSeeds() { Tetrad tetrad = null; List<Node> empty = new ArrayList(); if (variables.size() < 4) { Set<Set<Integer>> ESeeds = new HashSet<Set<Integer>>(); } Map<Node, Set<Node>> adjacencies; if (depth == -2) { adjacencies = new HashMap<Node, Set<Node>>(); for (Node node : variables) { HashSet<Node> _nodes = new HashSet<Node>(variables); _nodes.remove(node); adjacencies.put(node, _nodes); } } else { // System.out.println("Running PC adjacency search..."); Graph graph = new EdgeListGraph(variables); Fas fas = new Fas(graph, indTest); fas.setVerbose(false); fas.setDepth(depth); // 1? adjacencies = fas.searchMapOnly(); // System.out.println("...done."); } List<Integer> allVariables = new ArrayList<Integer>(); for (int i = 0; i < variables.size(); i++) allVariables.add(i); log("Finding seeds.", true); ChoiceGenerator gen = new ChoiceGenerator(allVariables.size(), 3); int[] choice; CHOICE: while ((choice = gen.next()) != null) { int n1 = allVariables.get(choice[0]); int n2 = allVariables.get(choice[1]); int n3 = allVariables.get(choice[2]); Node v1 = variables.get(choice[0]); Node v2 = variables.get(choice[1]); Node v3 = variables.get(choice[2]); Set<Integer> triple = triple(n1, n2, n3); if (!clique(triple, adjacencies)) { continue; } boolean EPure = true; boolean CPure1 = true; boolean CPure2 = true; boolean CPure3 = true; for (int o : allVariables) { if (triple.contains(o)) { continue; } Node v4 = variables.get(o); tetrad = new Tetrad(v1, v2, v3, v4); if (deltaTest.getPValue(tetrad) > alpha) { EPure = false; if (indTest.isDependent(v1, v4, empty)) { CPure1 = false; } if (indTest.isDependent(v2, v4, empty)) { CPure2 = false; } } tetrad = new Tetrad(v1, v3, v2, v4); if (deltaTest.getPValue(tetrad) > alpha) { EPure = false; if (indTest.isDependent(v3, v4, empty)) { CPure3 = false; } } if (!(EPure || CPure1 || CPure2 || CPure3)) { continue CHOICE; } } HashSet<Integer> _cluster = new HashSet<Integer>(triple); if (verbose) { log("++" + variablesForIndices(new ArrayList<Integer>(triple)), false); } if (EPure) { ESeeds.add(_cluster); } if (!EPure) { if (CPure1) { Set<Integer> _cluster1 = new HashSet<Integer>(n2, n3); _cluster1.addAll(CSeeds.get(n1)); CSeeds.set(n1, _cluster1); } if (CPure2) { Set<Integer> _cluster2 = new HashSet<Integer>(n1, n3); _cluster2.addAll(CSeeds.get(n2)); CSeeds.set(n2, _cluster2); } if (CPure3) { Set<Integer> _cluster3 = new HashSet<Integer>(n1, n2); _cluster3.addAll(CSeeds.get(n3)); CSeeds.set(n3, _cluster3); } } } return null; }
// Finds clusters of size 3. private Set<Set<Integer>> findMixedClusters( List<Integer> remaining, Set<Integer> unionPure, Map<Node, Set<Node>> adjacencies) { Set<Set<Integer>> threeClusters = new HashSet<Set<Integer>>(); if (unionPure.isEmpty()) { return new HashSet<Set<Integer>>(); } REMAINING: while (true) { if (remaining.size() < 3) break; ChoiceGenerator gen = new ChoiceGenerator(remaining.size(), 3); int[] choice; while ((choice = gen.next()) != null) { int y = remaining.get(choice[0]); int z = remaining.get(choice[1]); int w = remaining.get(choice[2]); Set<Integer> cluster = new HashSet<Integer>(); cluster.add(y); cluster.add(z); cluster.add(w); // if (!allVariablesDependent(cluster)) { // continue; // } if (!clique(cluster, adjacencies)) { continue; } // Check all x as a cross check; really only one should be necessary. boolean allX = true; for (int x : unionPure) { Set<Integer> _cluster = new HashSet<Integer>(cluster); _cluster.add(x); if (!quartetVanishes(_cluster) || !significant(new ArrayList<Integer>(_cluster))) { allX = false; break; } } if (allX) { threeClusters.add(cluster); unionPure.addAll(cluster); remaining.removeAll(cluster); System.out.println( "3-cluster found: " + variablesForIndices(new ArrayList<Integer>(cluster))); continue REMAINING; } } break; } return threeClusters; }
// Trying to optimize the search for 4-cliques a bit. private Set<Set<Integer>> findPureClusters2( List<Integer> _variables, Map<Node, Set<Node>> adjacencies) { System.out.println("Original variables = " + variables); Set<Set<Integer>> clusters = new HashSet<Set<Integer>>(); List<Integer> allVariables = new ArrayList<Integer>(); Set<Node> foundVariables = new HashSet<Node>(); for (int i = 0; i < this.variables.size(); i++) allVariables.add(i); for (int x : _variables) { Node nodeX = variables.get(x); if (foundVariables.contains(nodeX)) continue; List<Node> adjX = new ArrayList<Node>(adjacencies.get(nodeX)); adjX.removeAll(foundVariables); if (adjX.size() < 3) continue; for (Node nodeY : adjX) { if (foundVariables.contains(nodeY)) continue; List<Node> commonXY = new ArrayList<Node>(adjacencies.get(nodeY)); commonXY.retainAll(adjX); commonXY.removeAll(foundVariables); for (Node nodeZ : commonXY) { if (foundVariables.contains(nodeZ)) continue; List<Node> commonXZ = new ArrayList<Node>(commonXY); commonXZ.retainAll(adjacencies.get(nodeZ)); commonXZ.removeAll(foundVariables); for (Node nodeW : commonXZ) { if (foundVariables.contains(nodeW)) continue; if (!adjacencies.get(nodeY).contains(nodeW)) { continue; } int y = variables.indexOf(nodeY); int w = variables.indexOf(nodeW); int z = variables.indexOf(nodeZ); Set<Integer> cluster = quartet(x, y, z, w); // Note that purity needs to be assessed with respect to all of the variables in order // to // remove all latent-measure impurities between pairs of latents. if (pure(cluster, allVariables)) { O: for (int o : _variables) { if (cluster.contains(o)) continue; cluster.add(o); if (!clique(cluster, adjacencies)) { cluster.remove(o); continue O; } // if (!allVariablesDependent(cluster)) { // cluster.remove(o); // continue O; // } List<Integer> _cluster = new ArrayList<Integer>(cluster); ChoiceGenerator gen2 = new ChoiceGenerator(_cluster.size(), 4); int[] choice2; int count = 0; while ((choice2 = gen2.next()) != null) { int x2 = _cluster.get(choice2[0]); int y2 = _cluster.get(choice2[1]); int z2 = _cluster.get(choice2[2]); int w2 = _cluster.get(choice2[3]); Set<Integer> quartet = quartet(x2, y2, z2, w2); // Optimizes for large clusters. if (quartet.contains(o)) { if (++count > 2) continue O; } if (quartet.contains(o) && !pure(quartet, allVariables)) { cluster.remove(o); continue O; } } } System.out.println( "Cluster found: " + variablesForIndices(new ArrayList<Integer>(cluster))); clusters.add(cluster); foundVariables.addAll(variablesForIndices(new ArrayList<Integer>(cluster))); } } } } } return clusters; }