private void calculateArrowsForward(Node x, Node y, Graph graph) { clearArrow(x, y); if (!knowledgeEmpty()) { if (getKnowledge().isForbidden(x.getName(), y.getName())) { return; } } List<Node> naYX = getNaYX(x, y, graph); List<Node> t = getTNeighbors(x, y, graph); DepthChoiceGenerator gen = new DepthChoiceGenerator(t.size(), t.size()); int[] choice; while ((choice = gen.next()) != null) { List<Node> s = GraphUtils.asList(choice, t); if (!knowledgeEmpty()) { if (!validSetByKnowledge(y, s)) { continue; } } double bump = insertEval(x, y, s, naYX, graph); if (bump > 0.0) { Arrow arrow = new Arrow(bump, x, y, s, naYX); sortedArrows.add(arrow); addLookupArrow(x, y, arrow); } } }
/** * Use background knowledge to decide if an insert or delete operation does not orient edges in a * forbidden direction according to prior knowledge. If some orientation is forbidden in the * subset, the whole subset is forbidden. */ private boolean validSetByKnowledge(Node y, List<Node> subset) { for (Node node : subset) { if (getKnowledge().isForbidden(node.getName(), y.getName())) { return false; } } return true; }
private Node getVariable(List<Node> variables, String name) { for (Node node : variables) { if (name.equals(node.getName())) { return node; } } return null; }
private double pValue(Node node, List<Node> parents) { List<Double> _residuals = new ArrayList<Double>(); Node _target = node; List<Node> _regressors = parents; Node target = getVariable(variables, _target.getName()); List<Node> regressors = new ArrayList<Node>(); for (Node _regressor : _regressors) { Node variable = getVariable(variables, _regressor.getName()); regressors.add(variable); } DATASET: for (int m = 0; m < dataSets.size(); m++) { RegressionResult result = regressions.get(m).regress(target, regressors); TetradVector residualsSingleDataset = result.getResiduals(); for (int h = 0; h < residualsSingleDataset.size(); h++) { if (Double.isNaN(residualsSingleDataset.get(h))) { continue DATASET; } } DoubleArrayList _residualsSingleDataset = new DoubleArrayList(residualsSingleDataset.toArray()); double mean = Descriptive.mean(_residualsSingleDataset); double std = Descriptive.standardDeviation( Descriptive.variance( _residualsSingleDataset.size(), Descriptive.sum(_residualsSingleDataset), Descriptive.sumOfSquares(_residualsSingleDataset))); for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) { // _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean) / // std); if (isMeanCenterResiduals()) { _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean)); } // _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2))); } for (int k = 0; k < _residualsSingleDataset.size(); k++) { _residuals.add(_residualsSingleDataset.get(k)); } } double[] _f = new double[_residuals.size()]; for (int k = 0; k < _residuals.size(); k++) { _f[k] = _residuals.get(k); } return new AndersonDarlingTest(_f).getP(); }
// Invalid if then nodes or graph changes. private void calculateArrowsBackward(Node x, Node y, Graph graph) { if (x == y) { return; } if (!graph.isAdjacentTo(x, y)) { return; } if (!knowledgeEmpty()) { if (!getKnowledge().noEdgeRequired(x.getName(), y.getName())) { return; } } List<Node> naYX = getNaYX(x, y, graph); clearArrow(x, y); List<Node> _naYX = new ArrayList<Node>(naYX); DepthChoiceGenerator gen = new DepthChoiceGenerator(_naYX.size(), _naYX.size()); int[] choice; while ((choice = gen.next()) != null) { List<Node> H = GraphUtils.asList(choice, _naYX); if (!knowledgeEmpty()) { if (!validSetByKnowledge(y, H)) { continue; } } double bump = deleteEval(x, y, H, naYX, graph); if (bump > 0.0) { Arrow arrow = new Arrow(bump, x, y, H, naYX); sortedArrows.add(arrow); addLookupArrow(x, y, arrow); } } }
private void initializeArrowsBackward(Graph graph) { sortedArrows.clear(); lookupArrows.clear(); for (Edge edge : graph.getEdges()) { Node x = edge.getNode1(); Node y = edge.getNode2(); if (!knowledgeEmpty()) { if (!getKnowledge().noEdgeRequired(x.getName(), y.getName())) { continue; } } if (Edges.isDirectedEdge(edge)) { calculateArrowsBackward(x, y, graph); } else { calculateArrowsBackward(x, y, graph); calculateArrowsBackward(y, x, graph); } } }
private void addRequiredEdges(Graph graph) { if (true) return; if (knowledgeEmpty()) return; for (Iterator<KnowledgeEdge> it = getKnowledge().requiredEdgesIterator(); it.hasNext(); ) { KnowledgeEdge next = it.next(); Node nodeA = graph.getNode(next.getFrom()); Node nodeB = graph.getNode(next.getTo()); if (!graph.isAncestorOf(nodeB, nodeA)) { graph.removeEdges(nodeA, nodeB); graph.addDirectedEdge(nodeA, nodeB); TetradLogger.getInstance() .log("insertedEdges", "Adding edge by knowledge: " + graph.getEdge(nodeA, nodeB)); } } for (Edge edge : graph.getEdges()) { final String A = edge.getNode1().getName(); final String B = edge.getNode2().getName(); if (knowledge.isForbidden(A, B)) { Node nodeA = edge.getNode1(); Node nodeB = edge.getNode2(); if (nodeA != null && nodeB != null && graph.isAdjacentTo(nodeA, nodeB) && !graph.isChildOf(nodeA, nodeB)) { if (!graph.isAncestorOf(nodeA, nodeB)) { graph.removeEdges(nodeA, nodeB); graph.addDirectedEdge(nodeB, nodeA); TetradLogger.getInstance() .log("insertedEdges", "Adding edge by knowledge: " + graph.getEdge(nodeB, nodeA)); } } if (!graph.isChildOf(nodeA, nodeB) && getKnowledge().isForbidden(nodeA.getName(), nodeB.getName())) { if (!graph.isAncestorOf(nodeA, nodeB)) { graph.removeEdges(nodeA, nodeB); graph.addDirectedEdge(nodeB, nodeA); TetradLogger.getInstance() .log("insertedEdges", "Adding edge by knowledge: " + graph.getEdge(nodeB, nodeA)); } } } else if (knowledge.isForbidden(B, A)) { Node nodeA = edge.getNode2(); Node nodeB = edge.getNode1(); if (nodeA != null && nodeB != null && graph.isAdjacentTo(nodeA, nodeB) && !graph.isChildOf(nodeA, nodeB)) { if (!graph.isAncestorOf(nodeA, nodeB)) { graph.removeEdges(nodeA, nodeB); graph.addDirectedEdge(nodeB, nodeA); TetradLogger.getInstance() .log("insertedEdges", "Adding edge by knowledge: " + graph.getEdge(nodeB, nodeA)); } } if (!graph.isChildOf(nodeA, nodeB) && getKnowledge().isForbidden(nodeA.getName(), nodeB.getName())) { if (!graph.isAncestorOf(nodeA, nodeB)) { graph.removeEdges(nodeA, nodeB); graph.addDirectedEdge(nodeB, nodeA); TetradLogger.getInstance() .log("insertedEdges", "Adding edge by knowledge: " + graph.getEdge(nodeB, nodeA)); } } } } }
/** Do an actual deletion (Definition 13 from Chickering, 2002). */ private void delete(Node x, Node y, List<Node> subset, Graph graph, double bump) { Edge trueEdge = null; if (trueGraph != null) { Node _x = trueGraph.getNode(x.getName()); Node _y = trueGraph.getNode(y.getName()); trueEdge = trueGraph.getEdge(_x, _y); } if (log && verbose) { Edge oldEdge = graph.getEdge(x, y); String label = trueGraph != null && trueEdge != null ? "*" : ""; TetradLogger.getInstance() .log( "deletedEdges", (graph.getNumEdges() - 1) + ". DELETE " + oldEdge + " " + subset + " (" + bump + ") " + label); out.println( (graph.getNumEdges() - 1) + ". DELETE " + oldEdge + " " + subset + " (" + bump + ") " + label); } else { int numEdges = graph.getNumEdges() - 1; if (numEdges % 50 == 0) out.println(numEdges); } graph.removeEdge(x, y); for (Node h : subset) { Edge oldEdge = graph.getEdge(y, h); graph.removeEdge(y, h); graph.addDirectedEdge(y, h); if (log) { TetradLogger.getInstance() .log("directedEdges", "--- Directing " + oldEdge + " to " + graph.getEdge(y, h)); } if (verbose) { out.println("--- Directing " + oldEdge + " to " + graph.getEdge(y, h)); } if (Edges.isUndirectedEdge(graph.getEdge(x, h))) { if (!graph.isAdjacentTo(x, h)) throw new IllegalArgumentException("Not adjacent: " + x + ", " + h); oldEdge = graph.getEdge(x, h); graph.removeEdge(x, h); graph.addDirectedEdge(x, h); if (log) { TetradLogger.getInstance() .log("directedEdges", "--- Directing " + oldEdge + " to " + graph.getEdge(x, h)); } if (verbose) { out.println("--- Directing " + oldEdge + " to " + graph.getEdge(x, h)); } } } }
// serial. private void insert(Node x, Node y, List<Node> t, Graph graph, double bump) { if (graph.isAdjacentTo(x, y)) { return; // The initial graph may already have put this edge in the graph. // throw new IllegalArgumentException(x + " and " + y + " are already adjacent in // the graph."); } Edge trueEdge = null; if (trueGraph != null) { Node _x = trueGraph.getNode(x.getName()); Node _y = trueGraph.getNode(y.getName()); trueEdge = trueGraph.getEdge(_x, _y); } graph.addDirectedEdge(x, y); if (log) { String label = trueGraph != null && trueEdge != null ? "*" : ""; TetradLogger.getInstance() .log( "insertedEdges", graph.getNumEdges() + ". INSERT " + graph.getEdge(x, y) + " " + t + " " + bump + " " + label); } else { int numEdges = graph.getNumEdges() - 1; if (verbose) { if (numEdges % 50 == 0) out.println(numEdges); } } if (verbose) { String label = trueGraph != null && trueEdge != null ? "*" : ""; out.println( graph.getNumEdges() + ". INSERT " + graph.getEdge(x, y) + " " + t + " " + bump + " " + label); } else { int numEdges = graph.getNumEdges() - 1; if (verbose) { if (numEdges % 50 == 0) out.println(numEdges); } } for (Node _t : t) { Edge oldEdge = graph.getEdge(_t, y); if (oldEdge == null) throw new IllegalArgumentException("Not adjacent: " + _t + ", " + y); graph.removeEdge(_t, y); graph.addDirectedEdge(_t, y); if (log && verbose) { TetradLogger.getInstance() .log("directedEdges", "--- Directing " + oldEdge + " to " + graph.getEdge(_t, y)); out.println("--- Directing " + oldEdge + " to " + graph.getEdge(_t, y)); } } }
private double andersonDarlingPASquareStarB(Node node, List<Node> parents) { List<Double> _residuals = new ArrayList<Double>(); Node _target = node; List<Node> _regressors = parents; Node target = getVariable(variables, _target.getName()); List<Node> regressors = new ArrayList<Node>(); for (Node _regressor : _regressors) { Node variable = getVariable(variables, _regressor.getName()); regressors.add(variable); } double sum = 0.0; DATASET: for (int m = 0; m < dataSets.size(); m++) { RegressionResult result = regressions.get(m).regress(target, regressors); TetradVector residualsSingleDataset = result.getResiduals(); for (int h = 0; h < residualsSingleDataset.size(); h++) { if (Double.isNaN(residualsSingleDataset.get(h))) { continue DATASET; } } DoubleArrayList _residualsSingleDataset = new DoubleArrayList(residualsSingleDataset.toArray()); double mean = Descriptive.mean(_residualsSingleDataset); double std = Descriptive.standardDeviation( Descriptive.variance( _residualsSingleDataset.size(), Descriptive.sum(_residualsSingleDataset), Descriptive.sumOfSquares(_residualsSingleDataset))); // By centering the individual residual columns, all moments of the mixture become weighted // averages of the moments // of the individual columns. // http://en.wikipedia.org/wiki/Mixture_distribution#Finite_and_countable_mixtures for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) { // _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean) / // std); // _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2)) / std); if (isMeanCenterResiduals()) { _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean)); } } double[] _f = new double[_residuals.size()]; for (int k = 0; k < _residuals.size(); k++) { _f[k] = _residuals.get(k); } sum += new AndersonDarlingTest(_f).getASquaredStar(); } return sum / dataSets.size(); }
private double localScoreB(Node node, List<Node> parents) { double score = 0.0; double maxScore = Double.NEGATIVE_INFINITY; Node _target = node; List<Node> _regressors = parents; Node target = getVariable(variables, _target.getName()); List<Node> regressors = new ArrayList<Node>(); for (Node _regressor : _regressors) { Node variable = getVariable(variables, _regressor.getName()); regressors.add(variable); } DATASET: for (int m = 0; m < dataSets.size(); m++) { RegressionResult result = regressions.get(m).regress(target, regressors); TetradVector residualsSingleDataset = result.getResiduals(); DoubleArrayList _residualsSingleDataset = new DoubleArrayList(residualsSingleDataset.toArray()); for (int h = 0; h < residualsSingleDataset.size(); h++) { if (Double.isNaN(residualsSingleDataset.get(h))) { continue DATASET; } } double mean = Descriptive.mean(_residualsSingleDataset); double std = Descriptive.standardDeviation( Descriptive.variance( _residualsSingleDataset.size(), Descriptive.sum(_residualsSingleDataset), Descriptive.sumOfSquares(_residualsSingleDataset))); for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) { _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean) / std); } double[] _f = new double[_residualsSingleDataset.size()]; for (int k = 0; k < _residualsSingleDataset.size(); k++) { _f[k] = _residualsSingleDataset.get(k); } DoubleArrayList f = new DoubleArrayList(_f); for (int k = 0; k < f.size(); k++) { f.set(k, Math.abs(f.get(k))); } double _mean = Descriptive.mean(f); double diff = _mean - Math.sqrt(2.0 / Math.PI); score += diff * diff; if (score > maxScore) { maxScore = score; } } double avg = score / dataSets.size(); return avg; }
/** * Calculates the error variance for the given error node, given all of the coefficient values in * the model. * * @param error An error term in the model--i.e. a variable with NodeType.ERROR. * @return The value of the error variance, or Double.NaN is the value is undefined. */ private double calculateErrorVarianceFromParams(Node error) { error = semGraph.getNode(error.getName()); Node child = semGraph.getChildren(error).get(0); List<Node> parents = semGraph.getParents(child); double otherVariance = 0; for (Node parent : parents) { if (parent == error) continue; double coef = getEdgeCoefficient(parent, child); otherVariance += coef * coef; } if (parents.size() >= 2) { ChoiceGenerator gen = new ChoiceGenerator(parents.size(), 2); int[] indices; while ((indices = gen.next()) != null) { Node node1 = parents.get(indices[0]); Node node2 = parents.get(indices[1]); double coef1, coef2; if (node1.getNodeType() != NodeType.ERROR) { coef1 = getEdgeCoefficient(node1, child); } else { coef1 = 1; } if (node2.getNodeType() != NodeType.ERROR) { coef2 = getEdgeCoefficient(node2, child); } else { coef2 = 1; } List<List<Node>> treks = GraphUtils.treksIncludingBidirected(semGraph, node1, node2); double cov = 0.0; for (List<Node> trek : treks) { double product = 1.0; for (int i = 1; i < trek.size(); i++) { Node _node1 = trek.get(i - 1); Node _node2 = trek.get(i); Edge edge = semGraph.getEdge(_node1, _node2); double factor; if (Edges.isBidirectedEdge(edge)) { factor = edgeParameters.get(edge); } else if (!edgeParameters.containsKey(edge)) { factor = 1; } else if (semGraph.isParentOf(_node1, _node2)) { factor = getEdgeCoefficient(_node1, _node2); } else { factor = getEdgeCoefficient(_node2, _node1); } product *= factor; } cov += product; } otherVariance += 2 * coef1 * coef2 * cov; } } return 1.0 - otherVariance <= 0 ? Double.NaN : 1.0 - otherVariance; }