/** * Calculates the sample likelihood and BIC score for i given its parents in a simple SEM model. */ private double localSemScore(int i, int[] parents) { try { ICovarianceMatrix cov = getCovMatrix(); double varianceY = cov.getValue(i, i); double residualVariance = varianceY; int n = sampleSize(); int p = parents.length; int k = (p * (p + 1)) / 2 + p; // int k = (p + 1) * (p + 1); // int k = p + 1; TetradMatrix covxx = cov.getSelection(parents, parents); TetradMatrix covxxInv = covxx.inverse(); TetradVector covxy = cov.getSelection(parents, new int[] {i}).getColumn(0); TetradVector b = covxxInv.times(covxy); residualVariance -= covxy.dotProduct(b); if (residualVariance <= 0 && verbose) { out.println( "Nonpositive residual varianceY: resVar / varianceY = " + (residualVariance / varianceY)); return Double.NaN; } double c = getPenaltyDiscount(); // return -n * log(residualVariance) - 2 * k; //AIC return -n * Math.log(residualVariance) - c * k * Math.log(n); // return -n * log(residualVariance) - c * k * (log(n) - log(2 * PI)); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); // throwMinimalLinearDependentSet(parents, cov); } }
/** * @return Returns the error covariance matrix of the model. i.e. [a][b] is the covariance of E_a * and E_b, with [a][a] of course being the variance of E_a. THESE ARE NOT PARAMETERS OF THE * MODEL; THEY ARE CALCULATED. Note that elements of this matrix may be Double.NaN; this * indicates that these elements cannot be calculated. */ private TetradMatrix errCovar(Map<Node, Double> errorVariances) { List<Node> variableNodes = getVariableNodes(); List<Node> errorNodes = new ArrayList<Node>(); for (Node node : variableNodes) { errorNodes.add(semGraph.getExogenous(node)); } TetradMatrix errorCovar = new TetradMatrix(errorVariances.size(), errorVariances.size()); for (int index = 0; index < errorNodes.size(); index++) { Node error = errorNodes.get(index); double variance = getErrorVariance(error); errorCovar.set(index, index, variance); } for (int index1 = 0; index1 < errorNodes.size(); index1++) { for (int index2 = 0; index2 < errorNodes.size(); index2++) { Node error1 = errorNodes.get(index1); Node error2 = errorNodes.get(index2); Edge edge = semGraph.getEdge(error1, error2); if (edge != null && Edges.isBidirectedEdge(edge)) { double covariance = getErrorCovariance(error1, error2); errorCovar.set(index1, index2, covariance); } } } return errorCovar; }
private TetradMatrix subMatrix(Node x, Node y, List<Node> z) { int dim = z.size() + 2; int[] indices = new int[dim]; indices[0] = variables.indexOf(x); indices[1] = variables.indexOf(y); for (int k = 0; k < z.size(); k++) { indices[k + 2] = variables.indexOf(z.get(k)); } TetradMatrix submatrix = new TetradMatrix(dim, dim); for (int i = 0; i < dim; i++) { for (int j = 0; j < dim; j++) { int i1 = indices[i]; int i2 = indices[j]; submatrix.set(i, j, covMatrix.getDouble(i1, i2)); } } return submatrix; }
/** * @return The edge coefficient matrix of the model, a la SemIm. Note that this will normally need * to be transposed, since [a][b] is the edge coefficient for a-->b, not b-->a. Sorry. * History. THESE ARE PARAMETERS OF THE MODEL--THE ONLY PARAMETERS. */ public TetradMatrix edgeCoef() { List<Node> variableNodes = getVariableNodes(); TetradMatrix edgeCoef = new TetradMatrix(variableNodes.size(), variableNodes.size()); for (Edge edge : edgeParameters.keySet()) { if (Edges.isBidirectedEdge(edge)) { continue; } Node a = edge.getNode1(); Node b = edge.getNode2(); int aindex = variableNodes.indexOf(a); int bindex = variableNodes.indexOf(b); double coef = edgeParameters.get(edge); edgeCoef.set(aindex, bindex, coef); } return edgeCoef; }
/** * Takes a Cholesky decomposition from the Cholesky.cholesky method and a set of data simulated * using the information in that matrix. Written by Don Crimbchin. Modified June 8, Matt * Easterday: added a random # seed so that data can be recalculated with the same result in * Causality lab * * @param cholesky the result from cholesky above. * @param randomUtil a random number generator, if null the method will make a new generator for * each random number needed * @return an array the same length as the width or length (cholesky should have the same width * and length) containing a randomly generate data set. */ private double[] exogenousData(TetradMatrix cholesky, RandomUtil randomUtil) { // Step 1. Generate normal samples. double exoData[] = new double[cholesky.rows()]; for (int i = 0; i < exoData.length; i++) { exoData[i] = randomUtil.nextNormal(0, 1); } // Step 2. Multiply by cholesky to get correct covariance. double point[] = new double[exoData.length]; for (int i = 0; i < exoData.length; i++) { double sum = 0.0; for (int j = 0; j <= i; j++) { sum += cholesky.get(i, j) * exoData[j]; } point[i] = sum; } return point; }
/** * @param sampleSize The sample size of the desired data set. * @param latentDataSaved True if latent variables should be included in the data set. * @return This returns a standardized data set simulated from the model, using the reduced form * method. */ public DataSet simulateDataReducedForm(int sampleSize, boolean latentDataSaved) { int numVars = getVariableNodes().size(); // Calculate inv(I - edgeCoef) TetradMatrix edgeCoef = edgeCoef().copy().transpose(); // TetradMatrix iMinusB = TetradAlgebra.identity(edgeCoef.rows()); // iMinusB.assign(edgeCoef, Functions.minus); TetradMatrix iMinusB = TetradAlgebra.identity(edgeCoef.rows()).minus(edgeCoef); TetradMatrix inv = iMinusB.inverse(); // Pick error values e, for each calculate inv * e. TetradMatrix sim = new TetradMatrix(sampleSize, numVars); // Generate error data with the right variances and covariances, then override this // with error data for varaibles that have special distributions defined. Not ideal, // but not sure what else to do at the moment. It's better than not taking covariances // into account! TetradMatrix cholesky = MatrixUtils.choleskyC(errCovar(errorVariances())); for (int i = 0; i < sampleSize; i++) { TetradVector e = new TetradVector(exogenousData(cholesky, RandomUtil.getInstance())); TetradVector ePrime = inv.times(e); sim.assignRow(i, ePrime); // sim.viewRow(i).assign(ePrime); } DataSet fullDataSet = ColtDataSet.makeContinuousData(getVariableNodes(), sim); if (latentDataSaved) { return fullDataSet; } else { return DataUtils.restrictToMeasured(fullDataSet); } }
/** * Constructs a new standardized SEM IM from the freeParameters in the given SEM IM. * * @param im Stop asking me for these things! The given SEM IM!!! * @param initialization CALCULATE_FROM_SEM if the initial values will be calculated from the * given SEM IM; INITIALIZE_FROM_DATA if data will be simulated from the given SEM, * standardized, and estimated. */ public StandardizedSemIm(SemIm im, Initialization initialization) { this.semPm = new SemPm(im.getSemPm()); this.semGraph = new SemGraph(semPm.getGraph()); semGraph.setShowErrorTerms(true); if (semGraph.existsDirectedCycle()) { throw new IllegalArgumentException("The cyclic case is not handled."); } if (initialization == Initialization.CALCULATE_FROM_SEM) { // This code calculates the new coefficients directly from the old ones. edgeParameters = new HashMap<Edge, Double>(); List<Node> nodes = im.getVariableNodes(); TetradMatrix impliedCovar = im.getImplCovar(true); for (Parameter parameter : im.getSemPm().getParameters()) { if (parameter.getType() == ParamType.COEF) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); int aindex = nodes.indexOf(a); int bindex = nodes.indexOf(b); double vara = impliedCovar.get(aindex, aindex); double stda = Math.sqrt(vara); double varb = impliedCovar.get(bindex, bindex); double stdb = Math.sqrt(varb); double oldCoef = im.getEdgeCoef(a, b); double newCoef = (stda / stdb) * oldCoef; edgeParameters.put(Edges.directedEdge(a, b), newCoef); } else if (parameter.getType() == ParamType.COVAR) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); Node exoa = semGraph.getExogenous(a); Node exob = semGraph.getExogenous(b); double covar = im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b)); edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar); } } } else { // This code estimates the new coefficients from simulated data from the old model. DataSet dataSet = im.simulateData(1000, false); TetradMatrix _dataSet = dataSet.getDoubleData(); _dataSet = DataUtils.standardizeData(_dataSet); DataSet dataSetStandardized = ColtDataSet.makeData(dataSet.getVariables(), _dataSet); SemEstimator estimator = new SemEstimator(dataSetStandardized, im.getSemPm()); SemIm imStandardized = estimator.estimate(); edgeParameters = new HashMap<Edge, Double>(); for (Parameter parameter : imStandardized.getSemPm().getParameters()) { if (parameter.getType() == ParamType.COEF) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); double coef = imStandardized.getEdgeCoef(a, b); edgeParameters.put(Edges.directedEdge(a, b), coef); } else if (parameter.getType() == ParamType.COVAR) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); Node exoa = semGraph.getExogenous(a); Node exob = semGraph.getExogenous(b); double covar = -im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b)); edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar); } } } this.measuredNodes = Collections.unmodifiableList(semPm.getMeasuredNodes()); }