public boolean isIndependent(Node x, Node y, List<Node> z) { int[] all = new int[z.size() + 2]; all[0] = variablesMap.get(x); all[1] = variablesMap.get(y); for (int i = 0; i < z.size(); i++) { all[i + 2] = variablesMap.get(z.get(i)); } int sampleSize = data.get(0).rows(); List<Double> pValues = new ArrayList<Double>(); for (int m = 0; m < ncov.size(); m++) { TetradMatrix _ncov = ncov.get(m).getSelection(all, all); TetradMatrix inv = _ncov.inverse(); double r = -inv.get(0, 1) / sqrt(inv.get(0, 0) * inv.get(1, 1)); double fisherZ = sqrt(sampleSize - z.size() - 3.0) * 0.5 * (Math.log(1.0 + r) - Math.log(1.0 - r)); double pValue; if (Double.isInfinite(fisherZ)) { pValue = 0; } else { pValue = 2.0 * (1.0 - RandomUtil.getInstance().normalCdf(0, 1, abs(fisherZ))); } pValues.add(pValue); } double _cutoff = alpha; if (fdr) { _cutoff = StatUtils.fdrCutoff(alpha, pValues, false); } Collections.sort(pValues); int index = (int) round((1.0 - percent) * pValues.size()); this.pValue = pValues.get(index); // if (this.pValue == 0) { // System.out.println("Zero pvalue "+ SearchLogUtils.independenceFactMsg(x, y, z, // getPValue())); // } boolean independent = this.pValue > _cutoff; if (verbose) { if (independent) { TetradLogger.getInstance() .log("independencies", SearchLogUtils.independenceFactMsg(x, y, z, getPValue())); // System.out.println(SearchLogUtils.independenceFactMsg(x, y, z, getPValue())); } else { TetradLogger.getInstance() .log("dependencies", SearchLogUtils.dependenceFactMsg(x, y, z, getPValue())); } } return independent; }
/** * Takes a Cholesky decomposition from the Cholesky.cholesky method and a set of data simulated * using the information in that matrix. Written by Don Crimbchin. Modified June 8, Matt * Easterday: added a random # seed so that data can be recalculated with the same result in * Causality lab * * @param cholesky the result from cholesky above. * @param randomUtil a random number generator, if null the method will make a new generator for * each random number needed * @return an array the same length as the width or length (cholesky should have the same width * and length) containing a randomly generate data set. */ private double[] exogenousData(TetradMatrix cholesky, RandomUtil randomUtil) { // Step 1. Generate normal samples. double exoData[] = new double[cholesky.rows()]; for (int i = 0; i < exoData.length; i++) { exoData[i] = randomUtil.nextNormal(0, 1); } // Step 2. Multiply by cholesky to get correct covariance. double point[] = new double[exoData.length]; for (int i = 0; i < exoData.length; i++) { double sum = 0.0; for (int j = 0; j <= i; j++) { sum += cholesky.get(i, j) * exoData[j]; } point[i] = sum; } return point; }
/** * This method computes the information matrix or Hessian matrix of second order partial * derivatives of the fitting function (4B_2 on page 135 of Bollen) with respect to the free * freeParameters of the estimated SEM. It then computes the inverse of the the information matrix * and calculates the standard errors of the freeParameters as the square roots of the diagonal * elements of that matrix. * * @param estSem the estimated SEM. */ public void computeStdErrors(ISemIm estSem) { // if (!unmeasuredLatents(estSem.getSemPm()).isEmpty()) { // int n = estSem.getFreeParameters().size(); // stdErrs = new double[n]; // // for (int i = 0; i < n; i++) { // stdErrs[i] = Double.NaN; // } // // return; // } // this.semIm = estSem; estSem.setParameterBoundsEnforced(false); double[] paramsOriginal = estSem.getFreeParamValues(); double delta; FittingFunction fcn = new SemFittingFunction(estSem); boolean ridder = false; // Ridder is more accurate but a lot slower. int n = fcn.getNumParameters(); // Store the free freeParameters of the SemIm so that they can be reset to these // values. The differentiation methods change them. double[] params = new double[n]; System.arraycopy(paramsOriginal, 0, params, 0, n); // If the Ridder method (secondPartialDerivativeRidr) is used to search for // the best delta it is initially set to 0.1. Otherwise the delta is set to // 0.005. That value has worked well for those fitting functions tested to // date. if (ridder) { delta = 0.1; } else { delta = 0.005; } // The Hessian matrix of second order partial derivatives is called the // information matrix. TetradMatrix hess = new TetradMatrix(n, n); List<Parameter> freeParameters = estSem.getFreeParameters(); boolean containsCovararianceParameter = false; for (Parameter p : freeParameters) { if (p.getType() == ParamType.COVAR) { containsCovararianceParameter = true; break; } } for (int i = 0; i < n; i++) { for (int j = i; j < n; j++) { Parameter pi = freeParameters.get(i); Parameter pj = freeParameters.get(j); if (!containsCovararianceParameter) { // Restrict off-diagonal to just collider edge freeParameters. if (i != j && (pi.getType() != ParamType.COEF || pj.getType() != ParamType.COEF)) { continue; } if (pi.getNodeB() != pj.getNodeB()) { continue; } } double v; if (ridder) { v = secondPartialDerivativeRidr(fcn, i, j, params, delta); } else { v = secondPartialDerivative(fcn, i, j, params, delta); } if (Math.abs(v) < 1e-7) { v = 0; } // if (Double.isNaN(v)) { // v = 0; // } hess.set(i, j, v); hess.set(j, i, v); } } ROWS: for (int i = 0; i < hess.rows(); i++) { for (int j = 0; j < hess.columns(); j++) { if (hess.get(i, j) != 0) { continue ROWS; } } // System.out.println("Zero row for " + freeParameters.get(i)); } // The diagonal elements of the inverse of the information matrix are the // squares of the standard errors of the freeParameters. Their order is the // same as in the array of free parameter values stored in paramsOriginal. try { TetradMatrix hessInv = hess.inverse(); // TetradMatrix hessInv = hess.ginverse(); // System.out.println("Inverse: " + hessInv); // for (int i = 0; i < freeParameters.size(); i++) { // System.out.println(i + " = " + freeParameters.get(i)); // } stdErrs = new double[n]; // Hence the standard errors of the freeParameters are the square roots of the // diagonal elements of the inverse of the information matrix. for (int i = 0; i < n; i++) { double v = Math.sqrt((2.0 / (estSem.getSampleSize() - 1)) * hessInv.get(i, i)); if (v == 0) { System.out.println("v = " + v + " hessInv(i, i) = " + hessInv.get(i, i)); } if (v == 0) { stdErrs[i] = Double.NaN; } else { stdErrs[i] = v; } } } catch (Exception e) { e.printStackTrace(); stdErrs = new double[n]; for (int i = 0; i < n; i++) { stdErrs[i] = Double.NaN; } } // Restore the freeParameters of the estimated SEM to their original values. estSem.setFreeParamValues(paramsOriginal); estSem.setParameterBoundsEnforced(true); }
/** * Constructs a new standardized SEM IM from the freeParameters in the given SEM IM. * * @param im Stop asking me for these things! The given SEM IM!!! * @param initialization CALCULATE_FROM_SEM if the initial values will be calculated from the * given SEM IM; INITIALIZE_FROM_DATA if data will be simulated from the given SEM, * standardized, and estimated. */ public StandardizedSemIm(SemIm im, Initialization initialization) { this.semPm = new SemPm(im.getSemPm()); this.semGraph = new SemGraph(semPm.getGraph()); semGraph.setShowErrorTerms(true); if (semGraph.existsDirectedCycle()) { throw new IllegalArgumentException("The cyclic case is not handled."); } if (initialization == Initialization.CALCULATE_FROM_SEM) { // This code calculates the new coefficients directly from the old ones. edgeParameters = new HashMap<Edge, Double>(); List<Node> nodes = im.getVariableNodes(); TetradMatrix impliedCovar = im.getImplCovar(true); for (Parameter parameter : im.getSemPm().getParameters()) { if (parameter.getType() == ParamType.COEF) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); int aindex = nodes.indexOf(a); int bindex = nodes.indexOf(b); double vara = impliedCovar.get(aindex, aindex); double stda = Math.sqrt(vara); double varb = impliedCovar.get(bindex, bindex); double stdb = Math.sqrt(varb); double oldCoef = im.getEdgeCoef(a, b); double newCoef = (stda / stdb) * oldCoef; edgeParameters.put(Edges.directedEdge(a, b), newCoef); } else if (parameter.getType() == ParamType.COVAR) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); Node exoa = semGraph.getExogenous(a); Node exob = semGraph.getExogenous(b); double covar = im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b)); edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar); } } } else { // This code estimates the new coefficients from simulated data from the old model. DataSet dataSet = im.simulateData(1000, false); TetradMatrix _dataSet = dataSet.getDoubleData(); _dataSet = DataUtils.standardizeData(_dataSet); DataSet dataSetStandardized = ColtDataSet.makeData(dataSet.getVariables(), _dataSet); SemEstimator estimator = new SemEstimator(dataSetStandardized, im.getSemPm()); SemIm imStandardized = estimator.estimate(); edgeParameters = new HashMap<Edge, Double>(); for (Parameter parameter : imStandardized.getSemPm().getParameters()) { if (parameter.getType() == ParamType.COEF) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); double coef = imStandardized.getEdgeCoef(a, b); edgeParameters.put(Edges.directedEdge(a, b), coef); } else if (parameter.getType() == ParamType.COVAR) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); Node exoa = semGraph.getExogenous(a); Node exob = semGraph.getExogenous(b); double covar = -im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b)); edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar); } } } this.measuredNodes = Collections.unmodifiableList(semPm.getMeasuredNodes()); }
public DataSet simulateDataCholesky( int sampleSize, TetradMatrix covar, List<Node> variableNodes) { List<Node> variables = new LinkedList<Node>(); for (Node node : variableNodes) { variables.add(node); } List<Node> newVariables = new ArrayList<Node>(); for (Node node : variables) { ContinuousVariable continuousVariable = new ContinuousVariable(node.getName()); continuousVariable.setNodeType(node.getNodeType()); newVariables.add(continuousVariable); } TetradMatrix impliedCovar = covar; DataSet fullDataSet = new ColtDataSet(sampleSize, newVariables); TetradMatrix cholesky = MatrixUtils.choleskyC(impliedCovar); // Simulate the data by repeatedly calling the Cholesky.exogenousData // method. Store only the data for the measured variables. ROW: for (int row = 0; row < sampleSize; row++) { // Step 1. Generate normal samples. double exoData[] = new double[cholesky.rows()]; for (int i = 0; i < exoData.length; i++) { exoData[i] = RandomUtil.getInstance().nextNormal(0, 1); // exoData[i] = randomUtil.nextUniform(-1, 1); } // Step 2. Multiply by cholesky to get correct covariance. double point[] = new double[exoData.length]; for (int i = 0; i < exoData.length; i++) { double sum = 0.0; for (int j = 0; j <= i; j++) { sum += cholesky.get(i, j) * exoData[j]; } point[i] = sum; } double rowData[] = point; for (int col = 0; col < variables.size(); col++) { int index = variableNodes.indexOf(variables.get(col)); double value = rowData[index]; if (Double.isNaN(value) || Double.isInfinite(value)) { throw new IllegalArgumentException("Value out of range: " + value); } fullDataSet.setDouble(row, col, value); } } return DataUtils.restrictToMeasured(fullDataSet); }