/** * @param node The variable node in question. * @return the error node for the given node. */ public Node getErrorNode(Node node) { if (errorNodes.contains(node)) { return node; } int index = variableNodes.indexOf(node); if (index == -1) { throw new NullPointerException( node + " is not a node in this model. Perhaps " + "it's another node with the same name."); } return errorNodes.get(index); }
/** * @return The edge coefficient matrix of the model, a la SemIm. Note that this will normally need * to be transposed, since [a][b] is the edge coefficient for a-->b, not b-->a. Sorry. * History. THESE ARE PARAMETERS OF THE MODEL--THE ONLY PARAMETERS. */ public TetradMatrix edgeCoef() { List<Node> variableNodes = getVariableNodes(); TetradMatrix edgeCoef = new TetradMatrix(variableNodes.size(), variableNodes.size()); for (Edge edge : edgeParameters.keySet()) { if (Edges.isBidirectedEdge(edge)) { continue; } Node a = edge.getNode1(); Node b = edge.getNode2(); int aindex = variableNodes.indexOf(a); int bindex = variableNodes.indexOf(b); double coef = edgeParameters.get(edge); edgeCoef.set(aindex, bindex, coef); } return edgeCoef; }
private void buildIndexing(Graph graph) { this.hashIndices = new HashMap<Node, Integer>(); for (Node node : graph.getNodes()) { this.hashIndices.put(node, variables.indexOf(node)); } }
/** * Constructs a new standardized SEM IM from the freeParameters in the given SEM IM. * * @param im Stop asking me for these things! The given SEM IM!!! * @param initialization CALCULATE_FROM_SEM if the initial values will be calculated from the * given SEM IM; INITIALIZE_FROM_DATA if data will be simulated from the given SEM, * standardized, and estimated. */ public StandardizedSemIm(SemIm im, Initialization initialization) { this.semPm = new SemPm(im.getSemPm()); this.semGraph = new SemGraph(semPm.getGraph()); semGraph.setShowErrorTerms(true); if (semGraph.existsDirectedCycle()) { throw new IllegalArgumentException("The cyclic case is not handled."); } if (initialization == Initialization.CALCULATE_FROM_SEM) { // This code calculates the new coefficients directly from the old ones. edgeParameters = new HashMap<Edge, Double>(); List<Node> nodes = im.getVariableNodes(); TetradMatrix impliedCovar = im.getImplCovar(true); for (Parameter parameter : im.getSemPm().getParameters()) { if (parameter.getType() == ParamType.COEF) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); int aindex = nodes.indexOf(a); int bindex = nodes.indexOf(b); double vara = impliedCovar.get(aindex, aindex); double stda = Math.sqrt(vara); double varb = impliedCovar.get(bindex, bindex); double stdb = Math.sqrt(varb); double oldCoef = im.getEdgeCoef(a, b); double newCoef = (stda / stdb) * oldCoef; edgeParameters.put(Edges.directedEdge(a, b), newCoef); } else if (parameter.getType() == ParamType.COVAR) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); Node exoa = semGraph.getExogenous(a); Node exob = semGraph.getExogenous(b); double covar = im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b)); edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar); } } } else { // This code estimates the new coefficients from simulated data from the old model. DataSet dataSet = im.simulateData(1000, false); TetradMatrix _dataSet = dataSet.getDoubleData(); _dataSet = DataUtils.standardizeData(_dataSet); DataSet dataSetStandardized = ColtDataSet.makeData(dataSet.getVariables(), _dataSet); SemEstimator estimator = new SemEstimator(dataSetStandardized, im.getSemPm()); SemIm imStandardized = estimator.estimate(); edgeParameters = new HashMap<Edge, Double>(); for (Parameter parameter : imStandardized.getSemPm().getParameters()) { if (parameter.getType() == ParamType.COEF) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); double coef = imStandardized.getEdgeCoef(a, b); edgeParameters.put(Edges.directedEdge(a, b), coef); } else if (parameter.getType() == ParamType.COVAR) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); Node exoa = semGraph.getExogenous(a); Node exob = semGraph.getExogenous(b); double covar = -im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b)); edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar); } } } this.measuredNodes = Collections.unmodifiableList(semPm.getMeasuredNodes()); }
// Trying to optimize the search for 4-cliques a bit. private Set<Set<Integer>> findPureClusters2( List<Integer> _variables, Map<Node, Set<Node>> adjacencies) { System.out.println("Original variables = " + variables); Set<Set<Integer>> clusters = new HashSet<Set<Integer>>(); List<Integer> allVariables = new ArrayList<Integer>(); Set<Node> foundVariables = new HashSet<Node>(); for (int i = 0; i < this.variables.size(); i++) allVariables.add(i); for (int x : _variables) { Node nodeX = variables.get(x); if (foundVariables.contains(nodeX)) continue; List<Node> adjX = new ArrayList<Node>(adjacencies.get(nodeX)); adjX.removeAll(foundVariables); if (adjX.size() < 3) continue; for (Node nodeY : adjX) { if (foundVariables.contains(nodeY)) continue; List<Node> commonXY = new ArrayList<Node>(adjacencies.get(nodeY)); commonXY.retainAll(adjX); commonXY.removeAll(foundVariables); for (Node nodeZ : commonXY) { if (foundVariables.contains(nodeZ)) continue; List<Node> commonXZ = new ArrayList<Node>(commonXY); commonXZ.retainAll(adjacencies.get(nodeZ)); commonXZ.removeAll(foundVariables); for (Node nodeW : commonXZ) { if (foundVariables.contains(nodeW)) continue; if (!adjacencies.get(nodeY).contains(nodeW)) { continue; } int y = variables.indexOf(nodeY); int w = variables.indexOf(nodeW); int z = variables.indexOf(nodeZ); Set<Integer> cluster = quartet(x, y, z, w); // Note that purity needs to be assessed with respect to all of the variables in order // to // remove all latent-measure impurities between pairs of latents. if (pure(cluster, allVariables)) { O: for (int o : _variables) { if (cluster.contains(o)) continue; cluster.add(o); if (!clique(cluster, adjacencies)) { cluster.remove(o); continue O; } // if (!allVariablesDependent(cluster)) { // cluster.remove(o); // continue O; // } List<Integer> _cluster = new ArrayList<Integer>(cluster); ChoiceGenerator gen2 = new ChoiceGenerator(_cluster.size(), 4); int[] choice2; int count = 0; while ((choice2 = gen2.next()) != null) { int x2 = _cluster.get(choice2[0]); int y2 = _cluster.get(choice2[1]); int z2 = _cluster.get(choice2[2]); int w2 = _cluster.get(choice2[3]); Set<Integer> quartet = quartet(x2, y2, z2, w2); // Optimizes for large clusters. if (quartet.contains(o)) { if (++count > 2) continue O; } if (quartet.contains(o) && !pure(quartet, allVariables)) { cluster.remove(o); continue O; } } } System.out.println( "Cluster found: " + variablesForIndices(new ArrayList<Integer>(cluster))); clusters.add(cluster); foundVariables.addAll(variablesForIndices(new ArrayList<Integer>(cluster))); } } } } } return clusters; }
// Finds clusters of size 4 or higher. private Set<Set<Integer>> findPureClusters( List<Integer> _variables, Map<Node, Set<Node>> adjacencies) { // System.out.println("Original variables = " + variables); Set<Set<Integer>> clusters = new HashSet<Set<Integer>>(); List<Integer> allVariables = new ArrayList<Integer>(); for (int i = 0; i < this.variables.size(); i++) allVariables.add(i); VARIABLES: while (!_variables.isEmpty()) { if (_variables.size() < 4) break; for (int x : _variables) { Node nodeX = variables.get(x); List<Node> adjX = new ArrayList<Node>(adjacencies.get(nodeX)); adjX.retainAll(variablesForIndices(new ArrayList<Integer>(_variables))); for (Node node : new ArrayList<Node>(adjX)) { if (adjacencies.get(node).size() < 3) { adjX.remove(node); } } if (adjX.size() < 3) { continue; } ChoiceGenerator gen = new ChoiceGenerator(adjX.size(), 3); int[] choice; while ((choice = gen.next()) != null) { Node nodeY = adjX.get(choice[0]); Node nodeZ = adjX.get(choice[1]); Node nodeW = adjX.get(choice[2]); int y = variables.indexOf(nodeY); int w = variables.indexOf(nodeW); int z = variables.indexOf(nodeZ); Set<Integer> cluster = quartet(x, y, z, w); if (!clique(cluster, adjacencies)) { continue; } // Note that purity needs to be assessed with respect to all of the variables in order to // remove all latent-measure impurities between pairs of latents. if (pure(cluster, allVariables)) { // Collections.shuffle(_variables); O: for (int o : _variables) { if (cluster.contains(o)) continue; cluster.add(o); List<Integer> _cluster = new ArrayList<Integer>(cluster); if (!clique(cluster, adjacencies)) { cluster.remove(o); continue O; } // if (!allVariablesDependent(cluster)) { // cluster.remove(o); // continue O; // } ChoiceGenerator gen2 = new ChoiceGenerator(_cluster.size(), 4); int[] choice2; int count = 0; while ((choice2 = gen2.next()) != null) { int x2 = _cluster.get(choice2[0]); int y2 = _cluster.get(choice2[1]); int z2 = _cluster.get(choice2[2]); int w2 = _cluster.get(choice2[3]); Set<Integer> quartet = quartet(x2, y2, z2, w2); // Optimizes for large clusters. if (quartet.contains(o)) { if (++count > 50) continue O; } if (quartet.contains(o) && !pure(quartet, allVariables)) { cluster.remove(o); continue O; } } } System.out.println( "Cluster found: " + variablesForIndices(new ArrayList<Integer>(cluster))); clusters.add(cluster); _variables.removeAll(cluster); continue VARIABLES; } } } break; } return clusters; }