/** * @param sampleSize The sample size of the desired data set. * @param latentDataSaved True if latent variables should be included in the data set. * @return This returns a standardized data set simulated from the model, using the reduced form * method. */ public DataSet simulateDataReducedForm(int sampleSize, boolean latentDataSaved) { int numVars = getVariableNodes().size(); // Calculate inv(I - edgeCoef) TetradMatrix edgeCoef = edgeCoef().copy().transpose(); // TetradMatrix iMinusB = TetradAlgebra.identity(edgeCoef.rows()); // iMinusB.assign(edgeCoef, Functions.minus); TetradMatrix iMinusB = TetradAlgebra.identity(edgeCoef.rows()).minus(edgeCoef); TetradMatrix inv = iMinusB.inverse(); // Pick error values e, for each calculate inv * e. TetradMatrix sim = new TetradMatrix(sampleSize, numVars); // Generate error data with the right variances and covariances, then override this // with error data for varaibles that have special distributions defined. Not ideal, // but not sure what else to do at the moment. It's better than not taking covariances // into account! TetradMatrix cholesky = MatrixUtils.choleskyC(errCovar(errorVariances())); for (int i = 0; i < sampleSize; i++) { TetradVector e = new TetradVector(exogenousData(cholesky, RandomUtil.getInstance())); TetradVector ePrime = inv.times(e); sim.assignRow(i, ePrime); // sim.viewRow(i).assign(ePrime); } DataSet fullDataSet = ColtDataSet.makeContinuousData(getVariableNodes(), sim); if (latentDataSaved) { return fullDataSet; } else { return DataUtils.restrictToMeasured(fullDataSet); } }
/** * Takes a Cholesky decomposition from the Cholesky.cholesky method and a set of data simulated * using the information in that matrix. Written by Don Crimbchin. Modified June 8, Matt * Easterday: added a random # seed so that data can be recalculated with the same result in * Causality lab * * @param cholesky the result from cholesky above. * @param randomUtil a random number generator, if null the method will make a new generator for * each random number needed * @return an array the same length as the width or length (cholesky should have the same width * and length) containing a randomly generate data set. */ private double[] exogenousData(TetradMatrix cholesky, RandomUtil randomUtil) { // Step 1. Generate normal samples. double exoData[] = new double[cholesky.rows()]; for (int i = 0; i < exoData.length; i++) { exoData[i] = randomUtil.nextNormal(0, 1); } // Step 2. Multiply by cholesky to get correct covariance. double point[] = new double[exoData.length]; for (int i = 0; i < exoData.length; i++) { double sum = 0.0; for (int j = 0; j <= i; j++) { sum += cholesky.get(i, j) * exoData[j]; } point[i] = sum; } return point; }
public IndTestFisherZPercentIndependent(List<DataSet> dataSets, double alpha) { this.dataSets = dataSets; this.variables = dataSets.get(0).getVariables(); data = new ArrayList<TetradMatrix>(); for (DataSet dataSet : dataSets) { dataSet = DataUtils.center(dataSet); TetradMatrix _data = dataSet.getDoubleData(); data.add(_data); } ncov = new ArrayList<TetradMatrix>(); for (TetradMatrix d : this.data) ncov.add(d.transpose().times(d).scalarMult(1.0 / d.rows())); setAlpha(alpha); rows = new int[dataSets.get(0).getNumRows()]; for (int i = 0; i < getRows().length; i++) getRows()[i] = i; variablesMap = new HashMap<Node, Integer>(); for (int i = 0; i < variables.size(); i++) { variablesMap.put(variables.get(i), i); } this.recursivePartialCorrelation = new ArrayList<RecursivePartialCorrelation>(); for (TetradMatrix covMatrix : ncov) { recursivePartialCorrelation.add(new RecursivePartialCorrelation(getVariables(), covMatrix)); } }
/** * This method computes the information matrix or Hessian matrix of second order partial * derivatives of the fitting function (4B_2 on page 135 of Bollen) with respect to the free * freeParameters of the estimated SEM. It then computes the inverse of the the information matrix * and calculates the standard errors of the freeParameters as the square roots of the diagonal * elements of that matrix. * * @param estSem the estimated SEM. */ public void computeStdErrors(ISemIm estSem) { // if (!unmeasuredLatents(estSem.getSemPm()).isEmpty()) { // int n = estSem.getFreeParameters().size(); // stdErrs = new double[n]; // // for (int i = 0; i < n; i++) { // stdErrs[i] = Double.NaN; // } // // return; // } // this.semIm = estSem; estSem.setParameterBoundsEnforced(false); double[] paramsOriginal = estSem.getFreeParamValues(); double delta; FittingFunction fcn = new SemFittingFunction(estSem); boolean ridder = false; // Ridder is more accurate but a lot slower. int n = fcn.getNumParameters(); // Store the free freeParameters of the SemIm so that they can be reset to these // values. The differentiation methods change them. double[] params = new double[n]; System.arraycopy(paramsOriginal, 0, params, 0, n); // If the Ridder method (secondPartialDerivativeRidr) is used to search for // the best delta it is initially set to 0.1. Otherwise the delta is set to // 0.005. That value has worked well for those fitting functions tested to // date. if (ridder) { delta = 0.1; } else { delta = 0.005; } // The Hessian matrix of second order partial derivatives is called the // information matrix. TetradMatrix hess = new TetradMatrix(n, n); List<Parameter> freeParameters = estSem.getFreeParameters(); boolean containsCovararianceParameter = false; for (Parameter p : freeParameters) { if (p.getType() == ParamType.COVAR) { containsCovararianceParameter = true; break; } } for (int i = 0; i < n; i++) { for (int j = i; j < n; j++) { Parameter pi = freeParameters.get(i); Parameter pj = freeParameters.get(j); if (!containsCovararianceParameter) { // Restrict off-diagonal to just collider edge freeParameters. if (i != j && (pi.getType() != ParamType.COEF || pj.getType() != ParamType.COEF)) { continue; } if (pi.getNodeB() != pj.getNodeB()) { continue; } } double v; if (ridder) { v = secondPartialDerivativeRidr(fcn, i, j, params, delta); } else { v = secondPartialDerivative(fcn, i, j, params, delta); } if (Math.abs(v) < 1e-7) { v = 0; } // if (Double.isNaN(v)) { // v = 0; // } hess.set(i, j, v); hess.set(j, i, v); } } ROWS: for (int i = 0; i < hess.rows(); i++) { for (int j = 0; j < hess.columns(); j++) { if (hess.get(i, j) != 0) { continue ROWS; } } // System.out.println("Zero row for " + freeParameters.get(i)); } // The diagonal elements of the inverse of the information matrix are the // squares of the standard errors of the freeParameters. Their order is the // same as in the array of free parameter values stored in paramsOriginal. try { TetradMatrix hessInv = hess.inverse(); // TetradMatrix hessInv = hess.ginverse(); // System.out.println("Inverse: " + hessInv); // for (int i = 0; i < freeParameters.size(); i++) { // System.out.println(i + " = " + freeParameters.get(i)); // } stdErrs = new double[n]; // Hence the standard errors of the freeParameters are the square roots of the // diagonal elements of the inverse of the information matrix. for (int i = 0; i < n; i++) { double v = Math.sqrt((2.0 / (estSem.getSampleSize() - 1)) * hessInv.get(i, i)); if (v == 0) { System.out.println("v = " + v + " hessInv(i, i) = " + hessInv.get(i, i)); } if (v == 0) { stdErrs[i] = Double.NaN; } else { stdErrs[i] = v; } } } catch (Exception e) { e.printStackTrace(); stdErrs = new double[n]; for (int i = 0; i < n; i++) { stdErrs[i] = Double.NaN; } } // Restore the freeParameters of the estimated SEM to their original values. estSem.setFreeParamValues(paramsOriginal); estSem.setParameterBoundsEnforced(true); }
public DataSet simulateDataCholesky( int sampleSize, TetradMatrix covar, List<Node> variableNodes) { List<Node> variables = new LinkedList<Node>(); for (Node node : variableNodes) { variables.add(node); } List<Node> newVariables = new ArrayList<Node>(); for (Node node : variables) { ContinuousVariable continuousVariable = new ContinuousVariable(node.getName()); continuousVariable.setNodeType(node.getNodeType()); newVariables.add(continuousVariable); } TetradMatrix impliedCovar = covar; DataSet fullDataSet = new ColtDataSet(sampleSize, newVariables); TetradMatrix cholesky = MatrixUtils.choleskyC(impliedCovar); // Simulate the data by repeatedly calling the Cholesky.exogenousData // method. Store only the data for the measured variables. ROW: for (int row = 0; row < sampleSize; row++) { // Step 1. Generate normal samples. double exoData[] = new double[cholesky.rows()]; for (int i = 0; i < exoData.length; i++) { exoData[i] = RandomUtil.getInstance().nextNormal(0, 1); // exoData[i] = randomUtil.nextUniform(-1, 1); } // Step 2. Multiply by cholesky to get correct covariance. double point[] = new double[exoData.length]; for (int i = 0; i < exoData.length; i++) { double sum = 0.0; for (int j = 0; j <= i; j++) { sum += cholesky.get(i, j) * exoData[j]; } point[i] = sum; } double rowData[] = point; for (int col = 0; col < variables.size(); col++) { int index = variableNodes.indexOf(variables.get(col)); double value = rowData[index]; if (Double.isNaN(value) || Double.isInfinite(value)) { throw new IllegalArgumentException("Value out of range: " + value); } fullDataSet.setDouble(row, col, value); } } return DataUtils.restrictToMeasured(fullDataSet); }