/** @return a string representation of the coefficients and variances of the model. */ public String toString() { StringBuilder buf = new StringBuilder(); NumberFormat nf = NumberFormatUtil.getInstance().getNumberFormat(); buf.append("\nStandardized SEM:"); buf.append("\n\nEdge coefficients (parameters):\n"); for (Edge edge : edgeParameters.keySet()) { if (!Edges.isDirectedEdge(edge)) { continue; } buf.append("\n" + edge + " " + nf.format(edgeParameters.get(edge))); } buf.append("\n\nError covariances (parameters):\n"); for (Edge edge : edgeParameters.keySet()) { if (!Edges.isBidirectedEdge(edge)) { continue; } buf.append("\n" + edge + " " + nf.format(edgeParameters.get(edge))); } buf.append("\n\nError variances (calculated):\n"); for (Node error : getErrorNodes()) { double variance = getErrorVariance(error); buf.append("\n" + error + " " + (Double.isNaN(variance) ? "Undefined" : nf.format(variance))); } buf.append("\n"); return buf.toString(); }
/** * Checks conditional independence of variable in a continuous data set using Fisher's Z test. See * Spirtes, Glymour, and Scheines, "Causation, Prediction and Search," 2nd edition, page 94. * * @author Joseph Ramsey * @author Frank Wimberly adapted IndTestCramerT for Fisher's Z */ public final class IndTestFisherZShortTriangular implements IndependenceTest { /** The covariance matrix. */ private final ShortTriangularMatrix covMatrix; /** The variables of the covariance matrix, in order. (Unmodifiable list.) */ private List<Node> variables; /** The significance level of the independence tests. */ private double alpha; /** * The value of the Fisher's Z statistic associated with the las calculated partial correlation. */ private double fisherZ; /** * The FisherZD independence test, used when Fisher Z throws an exception (i.e., when there's a * collinearity). */ private IndTestFisherZGeneralizedInverse deterministicTest; /** Formats as 0.0000. */ private static NumberFormat nf = NumberFormatUtil.getInstance().getNumberFormat(); /** Stores a reference to the dataset being analyzed. */ private DataSet dataSet; /** A stored p value, if the deterministic test was used. */ private double pValue = Double.NaN; // ==========================CONSTRUCTORS=============================// /** * Constructs a new Independence test which checks independence facts based on the correlation * matrix implied by the given data set (must be continuous). The given significance level is * used. * * @param dataSet A data set containing only continuous columns. * @param alpha The alpha level of the test. */ public IndTestFisherZShortTriangular(DataSet dataSet, double alpha) { if (!(dataSet.isContinuous())) { throw new IllegalArgumentException("Data set must be continuous."); } this.covMatrix = new ShortTriangularMatrix(dataSet.getNumColumns()); this.covMatrix.becomeCorrelationMatrix(dataSet); this.variables = dataSet.getVariables(); setAlpha(alpha); this.deterministicTest = new IndTestFisherZGeneralizedInverse(dataSet, alpha); this.dataSet = dataSet; } /** * Constructs a new Fisher Z independence test with the listed arguments. * * @param data A 2D continuous data set with no missing values. * @param variables A list of variables, a subset of the variables of <code>data</code>. * @param alpha The significance cutoff level. p values less than alpha will be reported as * dependent. */ public IndTestFisherZShortTriangular(TetradMatrix data, List<Node> variables, double alpha) { DataSet dataSet = ColtDataSet.makeContinuousData(variables, data); this.covMatrix = new ShortTriangularMatrix(dataSet.getNumColumns()); this.covMatrix.becomeCorrelationMatrix(dataSet); this.variables = dataSet.getVariables(); setAlpha(alpha); this.deterministicTest = new IndTestFisherZGeneralizedInverse(dataSet, alpha); } // /** // * Constructs a new independence test that will determine conditional // * independence facts using the given correlation matrix and the given // * significance level. // */ // public IndTestFisherZShortTriangular(CovarianceMatrix corrMatrix, double alpha) { // this.covMatrix = corrMatrix; // this.variables = Collections.unmodifiableList(corrMatrix.getVariables()); // setAlpha(alpha); // } // ==========================PUBLIC METHODS=============================// /** Creates a new IndTestCramerT instance for a subset of the variables. */ public IndependenceTest indTestSubset(List<Node> vars) { // if (vars.isEmpty()) { // throw new IllegalArgumentException("Subset may not be empty."); // } // // for (Node ar : vars) { // if (!variables.contains(ar)) { // throw new IllegalArgumentException( // "All vars must be original vars"); // } // } // // int[] indices = new int[vars.size()]; // // for (int i = 0; i < indices.length; i++) { // indices[i] = variables.indexOf(vars.get(i)); // } // // CovarianceMatrix newCovMatrix = getSubmatrix(indices); // // double alphaNew = getAlpha(); // return new IndTestFisherZShortTriangular(newCovMatrix, alphaNew); throw new UnsupportedOperationException(); } /** * Determines whether variable x is independent of variable y given a list of conditioning * variables z. * * @param x the one variable being compared. * @param y the second variable being compared. * @param z the list of conditioning variables. * @return true iff x _||_ y | z. * @throws RuntimeException if a matrix singularity is encountered. */ public boolean isIndependent(Node x, Node y, List<Node> z) { TetradMatrix submatrix = subMatrix(x, y, z); double r = 0; try { r = StatUtils.partialCorrelation(submatrix); if (Double.isNaN((r)) || r < -1. || r > 1.) throw new RuntimeException(); } catch (Exception e) { DepthChoiceGenerator gen = new DepthChoiceGenerator(z.size(), z.size()); int[] choice; while ((choice = gen.next()) != null) { try { List<Node> z2 = new ArrayList<Node>(z); z2.removeAll(GraphUtils.asList(choice, z)); submatrix = subMatrix(x, y, z2); r = StatUtils.partialCorrelation(submatrix); } catch (Exception e2) { continue; } // if (Double.isNaN(r)) continue; // // if (r > 1.) r = 1.; // if (r < -1.) r = -1.; if (Double.isNaN(r) || r < -1. || r > 1.) continue; break; } } // Either dividing by a zero standard deviation (in which case it's dependent) or doing a // regression // (effectively) with a multicolliarity if (Double.isNaN(r)) { int[] _z = new int[z.size()]; // for (int i = 0; i < _z.length; i++) _z[i] = i + 2; // //// double varx = StatUtils.partialVariance(submatrix, 0, _z); // submatrix.get(0, // 0); //// double vary = StatUtils.partialVariance(submatrix, 1, _z); //submatrix.get(1, // 1); // // double varx = submatrix.get(0, 0); // double vary = submatrix.get(1, 1); // // if (varx * vary == 0) { return true; // } } if (r > 1.) r = 1.; if (r < -1.) r = -1.; this.fisherZ = Math.sqrt(sampleSize() - z.size() - 3.0) * 0.5 * (Math.log(1.0 + r) - Math.log(1.0 - r)); if (Double.isNaN(this.fisherZ)) { throw new IllegalArgumentException( "The Fisher's Z " + "score for independence fact " + x + " _||_ " + y + " | " + z + " is undefined. r = " + r); } boolean independent = getPValue() > alpha; if (independent) { TetradLogger.getInstance() .log("independencies", SearchLogUtils.independenceFactMsg(x, y, z, getPValue())); } else { TetradLogger.getInstance() .log("dependencies", SearchLogUtils.dependenceFactMsg(x, y, z, getPValue())); } return independent; } private TetradMatrix subMatrix(Node x, Node y, List<Node> z) { int dim = z.size() + 2; int[] indices = new int[dim]; indices[0] = variables.indexOf(x); indices[1] = variables.indexOf(y); for (int k = 0; k < z.size(); k++) { indices[k + 2] = variables.indexOf(z.get(k)); } TetradMatrix submatrix = new TetradMatrix(dim, dim); for (int i = 0; i < dim; i++) { for (int j = 0; j < dim; j++) { int i1 = indices[i]; int i2 = indices[j]; submatrix.set(i, j, covMatrix.getDouble(i1, i2)); } } return submatrix; } public boolean isIndependent(Node x, Node y, Node... z) { return isIndependent(x, y, Arrays.asList(z)); } public boolean isDependent(Node x, Node y, List<Node> z) { return !isIndependent(x, y, z); } public boolean isDependent(Node x, Node y, Node... z) { List<Node> zList = Arrays.asList(z); return isDependent(x, y, zList); } /** @return the probability associated with the most recently computed independence test. */ public double getPValue() { if (!Double.isNaN(this.pValue)) { return Double.NaN; } else { return 2.0 * (1.0 - RandomUtil.getInstance().normalCdf(0, 1, Math.abs(fisherZ))); } } /** * Sets the significance level at which independence judgments should be made. Affects the cutoff * for partial correlations to be considered statistically equal to zero. */ public void setAlpha(double alpha) { if (alpha < 0.0 || alpha > 1.0) { throw new IllegalArgumentException("Significance out of range."); } this.alpha = alpha; // this.thresh = Double.NaN; } /** Gets the getModel significance level. */ public double getAlpha() { return this.alpha; } /** * @return the list of variables over which this independence checker is capable of determinine * independence relations-- that is, all the variables in the given graph or the given data * set. */ public List<Node> getVariables() { return this.variables; } /** @return the variable with the given name. */ public Node getVariable(String name) { for (int i = 0; i < getVariables().size(); i++) { Node variable = getVariables().get(i); if (variable.getName().equals(name)) { return variable; } } return null; } /** @return the list of variable varNames. */ public List<String> getVariableNames() { List<Node> variables = getVariables(); List<String> variableNames = new ArrayList<String>(); for (Node variable1 : variables) { variableNames.add(variable1.getName()); } return variableNames; } /** * If <code>isDeterminismAllowed()</code>, deters to IndTestFisherZD; otherwise throws * UnsupportedOperationException. */ public boolean determines(List<Node> z, Node x) throws UnsupportedOperationException { throw new UnsupportedOperationException(); // int[] parents = new int[z.size()]; // // for (int j = 0; j < parents.length; j++) { // parents[j] = covMatrix.getVariables().indexOf(z.get(j)); // } // // int i = covMatrix.getVariables().indexOf(x); // // TetradMatrix matrix2D = covMatrix.getMatrix(); // double variance = matrix2D.get(i, i); // // if (parents.length > 0) { // // // Regress z onto i, yielding regression coefficients b. // TetradMatrix Czz = // matrix2D.viewSelection(parents, parents); // TetradMatrix inverse; // try { // inverse = TetradAlgebra.inverse(Czz); //// inverse = MatrixUtils.ginverse(Czz); // } // catch (Exception e) { // return true; // } // // TetradVector Cyz = matrix2D.viewColumn(i); // Cyz = Cyz.viewSelection(parents); // TetradVector b = TetradAlgebra.times(inverse, Cyz); // // variance -= TetradAlgebra.times(Cyz, b); // } // // return variance < 0.01; } /** @return the data set being analyzed. */ public DataSet getData() { return dataSet; } @Override public ICovarianceMatrix getCov() { return null; } @Override public List<DataSet> getDataSets() { return null; } @Override public int getSampleSize() { return 0; } @Override public List<TetradMatrix> getCovMatrices() { return null; } public void shuffleVariables() { List<Node> nodes = new ArrayList(this.variables); Collections.shuffle(nodes); this.variables = Collections.unmodifiableList(nodes); } /** @return a string representation of this test. */ public String toString() { return "Fisher's Z, alpha = " + nf.format(getAlpha()); } // ==========================PRIVATE METHODS============================// /** * Computes that value x such that P(abs(N(0,1) > x) < alpha. Note that this is a two sided test * of the null hypothesis that the Fisher's Z value, which is distributed as N(0,1) is not equal * to 0.0. */ private double cutoffGaussian(double alpha) { double upperTail = 1.0 - alpha / 2.0; double epsilon = 1e-14; // Find an upper bound. double lowerBound = -1.0; double upperBound = 0.0; while (ProbUtils.normalCdf(upperBound) < upperTail) { lowerBound += 1.0; upperBound += 1.0; } while (upperBound >= lowerBound + epsilon) { double midPoint = lowerBound + (upperBound - lowerBound) / 2.0; if (ProbUtils.normalCdf(midPoint) <= upperTail) { lowerBound = midPoint; } else { upperBound = midPoint; } } return lowerBound; } private int sampleSize() { return dataSet.getNumRows(); } // private CovarianceMatrix covMatrix() { // return covMatrix; // } }