public IndTestFisherZPercentIndependent(List<DataSet> dataSets, double alpha) { this.dataSets = dataSets; this.variables = dataSets.get(0).getVariables(); data = new ArrayList<TetradMatrix>(); for (DataSet dataSet : dataSets) { dataSet = DataUtils.center(dataSet); TetradMatrix _data = dataSet.getDoubleData(); data.add(_data); } ncov = new ArrayList<TetradMatrix>(); for (TetradMatrix d : this.data) ncov.add(d.transpose().times(d).scalarMult(1.0 / d.rows())); setAlpha(alpha); rows = new int[dataSets.get(0).getNumRows()]; for (int i = 0; i < getRows().length; i++) getRows()[i] = i; variablesMap = new HashMap<Node, Integer>(); for (int i = 0; i < variables.size(); i++) { variablesMap.put(variables.get(i), i); } this.recursivePartialCorrelation = new ArrayList<RecursivePartialCorrelation>(); for (TetradMatrix covMatrix : ncov) { recursivePartialCorrelation.add(new RecursivePartialCorrelation(getVariables(), covMatrix)); } }
/** * @return Returns the error covariance matrix of the model. i.e. [a][b] is the covariance of E_a * and E_b, with [a][a] of course being the variance of E_a. THESE ARE NOT PARAMETERS OF THE * MODEL; THEY ARE CALCULATED. Note that elements of this matrix may be Double.NaN; this * indicates that these elements cannot be calculated. */ private TetradMatrix errCovar(Map<Node, Double> errorVariances) { List<Node> variableNodes = getVariableNodes(); List<Node> errorNodes = new ArrayList<Node>(); for (Node node : variableNodes) { errorNodes.add(semGraph.getExogenous(node)); } TetradMatrix errorCovar = new TetradMatrix(errorVariances.size(), errorVariances.size()); for (int index = 0; index < errorNodes.size(); index++) { Node error = errorNodes.get(index); double variance = getErrorVariance(error); errorCovar.set(index, index, variance); } for (int index1 = 0; index1 < errorNodes.size(); index1++) { for (int index2 = 0; index2 < errorNodes.size(); index2++) { Node error1 = errorNodes.get(index1); Node error2 = errorNodes.get(index2); Edge edge = semGraph.getEdge(error1, error2); if (edge != null && Edges.isBidirectedEdge(edge)) { double covariance = getErrorCovariance(error1, error2); errorCovar.set(index1, index2, covariance); } } } return errorCovar; }
/** * Calculates the sample likelihood and BIC score for i given its parents in a simple SEM model. */ private double localSemScore(int i, int[] parents) { try { ICovarianceMatrix cov = getCovMatrix(); double varianceY = cov.getValue(i, i); double residualVariance = varianceY; int n = sampleSize(); int p = parents.length; int k = (p * (p + 1)) / 2 + p; // int k = (p + 1) * (p + 1); // int k = p + 1; TetradMatrix covxx = cov.getSelection(parents, parents); TetradMatrix covxxInv = covxx.inverse(); TetradVector covxy = cov.getSelection(parents, new int[] {i}).getColumn(0); TetradVector b = covxxInv.times(covxy); residualVariance -= covxy.dotProduct(b); if (residualVariance <= 0 && verbose) { out.println( "Nonpositive residual varianceY: resVar / varianceY = " + (residualVariance / varianceY)); return Double.NaN; } double c = getPenaltyDiscount(); // return -n * log(residualVariance) - 2 * k; //AIC return -n * Math.log(residualVariance) - c * k * Math.log(n); // return -n * log(residualVariance) - c * k * (log(n) - log(2 * PI)); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); // throwMinimalLinearDependentSet(parents, cov); } }
private TetradMatrix subMatrix(Node x, Node y, List<Node> z) { int dim = z.size() + 2; int[] indices = new int[dim]; indices[0] = variables.indexOf(x); indices[1] = variables.indexOf(y); for (int k = 0; k < z.size(); k++) { indices[k + 2] = variables.indexOf(z.get(k)); } TetradMatrix submatrix = new TetradMatrix(dim, dim); for (int i = 0; i < dim; i++) { for (int j = 0; j < dim; j++) { int i1 = indices[i]; int i2 = indices[j]; submatrix.set(i, j, covMatrix.getDouble(i1, i2)); } } return submatrix; }
public boolean isIndependent(Node x, Node y, List<Node> z) { int[] all = new int[z.size() + 2]; all[0] = variablesMap.get(x); all[1] = variablesMap.get(y); for (int i = 0; i < z.size(); i++) { all[i + 2] = variablesMap.get(z.get(i)); } int sampleSize = data.get(0).rows(); List<Double> pValues = new ArrayList<Double>(); for (int m = 0; m < ncov.size(); m++) { TetradMatrix _ncov = ncov.get(m).getSelection(all, all); TetradMatrix inv = _ncov.inverse(); double r = -inv.get(0, 1) / sqrt(inv.get(0, 0) * inv.get(1, 1)); double fisherZ = sqrt(sampleSize - z.size() - 3.0) * 0.5 * (Math.log(1.0 + r) - Math.log(1.0 - r)); double pValue; if (Double.isInfinite(fisherZ)) { pValue = 0; } else { pValue = 2.0 * (1.0 - RandomUtil.getInstance().normalCdf(0, 1, abs(fisherZ))); } pValues.add(pValue); } double _cutoff = alpha; if (fdr) { _cutoff = StatUtils.fdrCutoff(alpha, pValues, false); } Collections.sort(pValues); int index = (int) round((1.0 - percent) * pValues.size()); this.pValue = pValues.get(index); // if (this.pValue == 0) { // System.out.println("Zero pvalue "+ SearchLogUtils.independenceFactMsg(x, y, z, // getPValue())); // } boolean independent = this.pValue > _cutoff; if (verbose) { if (independent) { TetradLogger.getInstance() .log("independencies", SearchLogUtils.independenceFactMsg(x, y, z, getPValue())); // System.out.println(SearchLogUtils.independenceFactMsg(x, y, z, getPValue())); } else { TetradLogger.getInstance() .log("dependencies", SearchLogUtils.dependenceFactMsg(x, y, z, getPValue())); } } return independent; }
/** * @return The edge coefficient matrix of the model, a la SemIm. Note that this will normally need * to be transposed, since [a][b] is the edge coefficient for a-->b, not b-->a. Sorry. * History. THESE ARE PARAMETERS OF THE MODEL--THE ONLY PARAMETERS. */ public TetradMatrix edgeCoef() { List<Node> variableNodes = getVariableNodes(); TetradMatrix edgeCoef = new TetradMatrix(variableNodes.size(), variableNodes.size()); for (Edge edge : edgeParameters.keySet()) { if (Edges.isBidirectedEdge(edge)) { continue; } Node a = edge.getNode1(); Node b = edge.getNode2(); int aindex = variableNodes.indexOf(a); int bindex = variableNodes.indexOf(b); double coef = edgeParameters.get(edge); edgeCoef.set(aindex, bindex, coef); } return edgeCoef; }
/** * Takes a Cholesky decomposition from the Cholesky.cholesky method and a set of data simulated * using the information in that matrix. Written by Don Crimbchin. Modified June 8, Matt * Easterday: added a random # seed so that data can be recalculated with the same result in * Causality lab * * @param cholesky the result from cholesky above. * @param randomUtil a random number generator, if null the method will make a new generator for * each random number needed * @return an array the same length as the width or length (cholesky should have the same width * and length) containing a randomly generate data set. */ private double[] exogenousData(TetradMatrix cholesky, RandomUtil randomUtil) { // Step 1. Generate normal samples. double exoData[] = new double[cholesky.rows()]; for (int i = 0; i < exoData.length; i++) { exoData[i] = randomUtil.nextNormal(0, 1); } // Step 2. Multiply by cholesky to get correct covariance. double point[] = new double[exoData.length]; for (int i = 0; i < exoData.length; i++) { double sum = 0.0; for (int j = 0; j <= i; j++) { sum += cholesky.get(i, j) * exoData[j]; } point[i] = sum; } return point; }
// Prints a smallest subset of parents that causes a singular matrix exception. private void printMinimalLinearlyDependentSet(int[] parents, ICovarianceMatrix cov) { List<Node> _parents = new ArrayList<>(); for (int p : parents) _parents.add(variables.get(p)); DepthChoiceGenerator gen = new DepthChoiceGenerator(_parents.size(), _parents.size()); int[] choice; while ((choice = gen.next()) != null) { int[] sel = new int[choice.length]; List<Node> _sel = new ArrayList<>(); for (int m = 0; m < choice.length; m++) { sel[m] = parents[m]; _sel.add(variables.get(sel[m])); } TetradMatrix m = cov.getSelection(sel, sel); try { m.inverse(); } catch (Exception e2) { out.println("### Linear dependence among variables: " + _sel); } } }
/** * @param sampleSize The sample size of the desired data set. * @param latentDataSaved True if latent variables should be included in the data set. * @return This returns a standardized data set simulated from the model, using the reduced form * method. */ public DataSet simulateDataReducedForm(int sampleSize, boolean latentDataSaved) { int numVars = getVariableNodes().size(); // Calculate inv(I - edgeCoef) TetradMatrix edgeCoef = edgeCoef().copy().transpose(); // TetradMatrix iMinusB = TetradAlgebra.identity(edgeCoef.rows()); // iMinusB.assign(edgeCoef, Functions.minus); TetradMatrix iMinusB = TetradAlgebra.identity(edgeCoef.rows()).minus(edgeCoef); TetradMatrix inv = iMinusB.inverse(); // Pick error values e, for each calculate inv * e. TetradMatrix sim = new TetradMatrix(sampleSize, numVars); // Generate error data with the right variances and covariances, then override this // with error data for varaibles that have special distributions defined. Not ideal, // but not sure what else to do at the moment. It's better than not taking covariances // into account! TetradMatrix cholesky = MatrixUtils.choleskyC(errCovar(errorVariances())); for (int i = 0; i < sampleSize; i++) { TetradVector e = new TetradVector(exogenousData(cholesky, RandomUtil.getInstance())); TetradVector ePrime = inv.times(e); sim.assignRow(i, ePrime); // sim.viewRow(i).assign(ePrime); } DataSet fullDataSet = ColtDataSet.makeContinuousData(getVariableNodes(), sim); if (latentDataSaved) { return fullDataSet; } else { return DataUtils.restrictToMeasured(fullDataSet); } }
/** * This method computes the information matrix or Hessian matrix of second order partial * derivatives of the fitting function (4B_2 on page 135 of Bollen) with respect to the free * freeParameters of the estimated SEM. It then computes the inverse of the the information matrix * and calculates the standard errors of the freeParameters as the square roots of the diagonal * elements of that matrix. * * @param estSem the estimated SEM. */ public void computeStdErrors(ISemIm estSem) { // if (!unmeasuredLatents(estSem.getSemPm()).isEmpty()) { // int n = estSem.getFreeParameters().size(); // stdErrs = new double[n]; // // for (int i = 0; i < n; i++) { // stdErrs[i] = Double.NaN; // } // // return; // } // this.semIm = estSem; estSem.setParameterBoundsEnforced(false); double[] paramsOriginal = estSem.getFreeParamValues(); double delta; FittingFunction fcn = new SemFittingFunction(estSem); boolean ridder = false; // Ridder is more accurate but a lot slower. int n = fcn.getNumParameters(); // Store the free freeParameters of the SemIm so that they can be reset to these // values. The differentiation methods change them. double[] params = new double[n]; System.arraycopy(paramsOriginal, 0, params, 0, n); // If the Ridder method (secondPartialDerivativeRidr) is used to search for // the best delta it is initially set to 0.1. Otherwise the delta is set to // 0.005. That value has worked well for those fitting functions tested to // date. if (ridder) { delta = 0.1; } else { delta = 0.005; } // The Hessian matrix of second order partial derivatives is called the // information matrix. TetradMatrix hess = new TetradMatrix(n, n); List<Parameter> freeParameters = estSem.getFreeParameters(); boolean containsCovararianceParameter = false; for (Parameter p : freeParameters) { if (p.getType() == ParamType.COVAR) { containsCovararianceParameter = true; break; } } for (int i = 0; i < n; i++) { for (int j = i; j < n; j++) { Parameter pi = freeParameters.get(i); Parameter pj = freeParameters.get(j); if (!containsCovararianceParameter) { // Restrict off-diagonal to just collider edge freeParameters. if (i != j && (pi.getType() != ParamType.COEF || pj.getType() != ParamType.COEF)) { continue; } if (pi.getNodeB() != pj.getNodeB()) { continue; } } double v; if (ridder) { v = secondPartialDerivativeRidr(fcn, i, j, params, delta); } else { v = secondPartialDerivative(fcn, i, j, params, delta); } if (Math.abs(v) < 1e-7) { v = 0; } // if (Double.isNaN(v)) { // v = 0; // } hess.set(i, j, v); hess.set(j, i, v); } } ROWS: for (int i = 0; i < hess.rows(); i++) { for (int j = 0; j < hess.columns(); j++) { if (hess.get(i, j) != 0) { continue ROWS; } } // System.out.println("Zero row for " + freeParameters.get(i)); } // The diagonal elements of the inverse of the information matrix are the // squares of the standard errors of the freeParameters. Their order is the // same as in the array of free parameter values stored in paramsOriginal. try { TetradMatrix hessInv = hess.inverse(); // TetradMatrix hessInv = hess.ginverse(); // System.out.println("Inverse: " + hessInv); // for (int i = 0; i < freeParameters.size(); i++) { // System.out.println(i + " = " + freeParameters.get(i)); // } stdErrs = new double[n]; // Hence the standard errors of the freeParameters are the square roots of the // diagonal elements of the inverse of the information matrix. for (int i = 0; i < n; i++) { double v = Math.sqrt((2.0 / (estSem.getSampleSize() - 1)) * hessInv.get(i, i)); if (v == 0) { System.out.println("v = " + v + " hessInv(i, i) = " + hessInv.get(i, i)); } if (v == 0) { stdErrs[i] = Double.NaN; } else { stdErrs[i] = v; } } } catch (Exception e) { e.printStackTrace(); stdErrs = new double[n]; for (int i = 0; i < n; i++) { stdErrs[i] = Double.NaN; } } // Restore the freeParameters of the estimated SEM to their original values. estSem.setFreeParamValues(paramsOriginal); estSem.setParameterBoundsEnforced(true); }
/** * Takes a list of tetrads for the given data set and returns the chi square value for the test. * We assume that the tetrads are non-redundant; if not, a matrix exception will be thrown. * * <p>Calculates the T statistic (Bollen and Ting, p. 161). This is significant if tests as * significant using the Chi Square distribution with degrees of freedom equal to the number of * nonredundant tetrads tested. */ @Override // public double calcChiSquare1(Sextad... sextads) { // this.storedSextads = sextads; // // this.df = sextads.length; // // // Need a list of symbolic covariances--i.e. covariances that appear in tetrads. // Set<Sigma> boldSigmaSet = new LinkedHashSet<Sigma>(); // List<Sigma> boldSigma = new ArrayList<Sigma>(); // // for (Sextad sextad : sextads) { // boldSigmaSet.add(new Sigma(sextad.getI(), sextad.getL())); // boldSigmaSet.add(new Sigma(sextad.getI(), sextad.getM())); // boldSigmaSet.add(new Sigma(sextad.getI(), sextad.getN())); // // boldSigmaSet.add(new Sigma(sextad.getJ(), sextad.getL())); // boldSigmaSet.add(new Sigma(sextad.getJ(), sextad.getM())); // boldSigmaSet.add(new Sigma(sextad.getJ(), sextad.getN())); // // boldSigmaSet.add(new Sigma(sextad.getK(), sextad.getL())); // boldSigmaSet.add(new Sigma(sextad.getK(), sextad.getM())); // boldSigmaSet.add(new Sigma(sextad.getK(), sextad.getN())); // } // // for (Sigma sigma : boldSigmaSet) { // boldSigma.add(sigma); // } // // // Need a matrix of variances and covariances of sample covariances. // TetradMatrix sigma_ss = TetradMatrix.instance(boldSigma.size(), boldSigma.size()); // // for (int i = 0; i < boldSigma.size(); i++) { // for (int j = 0; j < boldSigma.size(); j++) { // Sigma sigmaef = boldSigma.get(i); // Sigma sigmagh = boldSigma.get(j); // // Node e = sigmaef.getA(); // Node f = sigmaef.getB(); // Node g = sigmagh.getA(); // Node h = sigmagh.getB(); // // if (cov != null && cov instanceof CorrelationMatrix) { // //// Assumes multinormality. Using formula 23. (Not implementing formula 22 because // that case //// does not come up.) // double rr = 0.5 * (r(e, f) * r(g, h)) // * (r(e, g) * r(e, g) + r(e, h) * r(e, h) + r(f, g) * r(f, g) + r(f, // h) * r(f, h)) // + r(e, g) * r(f, h) + r(e, h) * r(f, g) // - r(e, f) * (r(f, g) * r(f, h) + r(e, g) * r(e, h)) // - r(g, h) * (r(f, g) * r(e, g) + r(f, h) * r(e, h)); // // sigma_ss.set(i, j, rr); // } else if (cov != null && dataSet == null) { // // // Assumes multinormality--see p. 160. // double _ss = r(e, g) * r(f, h) + r(e, h) * r(f, g); // + or -? Different // advise. + in the code. // sigma_ss.set(i, j, _ss); // } else { // double _ss = sxyzw(e, f, g, h) - r(e, f) * r(g, h); // sigma_ss.set(i, j, _ss); // } // } // } // // // Need a matrix of of population estimates of partial derivatives of tetrads // // with respect to covariances in boldSigma. // TetradMatrix del = TetradMatrix.instance(boldSigma.size(), sextads.length); // // for (int i = 0; i < boldSigma.size(); i++) { // for (int j = 0; j < sextads.length; j++) { // Sigma sigma = boldSigma.get(i); // Sextad sextad = sextads[j]; // // Node m1 = sextad.getI(); // Node m2 = sextad.getJ(); // Node m3 = sextad.getK(); // Node m4 = sextad.getL(); // Node m5 = sextad.getM(); // Node m6 = sextad.getN(); // // double derivative = getDerivative(m1, m2, m3, m4, m5, m6, sigma.getA(), // sigma.getB()); // del.set(i, j, derivative); // } // } // // // Need a vector of population estimates of the sextads. // TetradMatrix t = TetradMatrix.instance(sextads.length, 1); // // for (int i = 0; i < sextads.length; i++) { // Sextad sextad = sextads[i]; // // List<Node> nodes = new ArrayList<Node>(); // // nodes.add(sextad.getI()); // nodes.add(sextad.getJ()); // nodes.add(sextad.getK()); // nodes.add(sextad.getL()); // nodes.add(sextad.getM()); // nodes.add(sextad.getN()); // // TetradMatrix m = TetradMatrix.instance(3, 3); // // for (int k1 = 0; k1 < 3; k1++) { // for (int k2 = 0; k2 < 3; k2++) { // m.set(k1, k2, r(nodes.get(k1), nodes.get(3+k2))); // } // } // // double value = TetradAlgebra.det(m); // t.set(i, 0, value); // this.storedValue = value; // } // // // Now multiply to get Sigma_tt // TetradMatrix w1 = TetradAlgebra.times(del.transpose(), sigma_ss); // TetradMatrix sigma_tt = TetradAlgebra.times(w1, del); // // // And now invert and multiply to get T. // TetradMatrix v0 = TetradAlgebra.inverse(sigma_tt); // TetradMatrix v1 = TetradAlgebra.times(t.transpose(), v0); // TetradMatrix v2 = TetradAlgebra.times(v1, t); // double chisq = N * v2.get(0, 0); // // this.chisq = chisq; // return chisq; // } public double calcChiSquare(Sextad... sextads) { this.storedSextads = sextads; this.df = 4; // sextads.length; List<Sigma> boldSigma = new ArrayList<Sigma>(); List<Node> _nodes = new ArrayList<Node>(sextads[0].getNodes()); for (int i = 0; i < _nodes.size(); i++) { for (int j = i + 1; j < _nodes.size(); j++) { boldSigma.add(new Sigma(_nodes.get(i), _nodes.get(j))); } } // Need a matrix of variances and covariances of sample covariances. TetradMatrix sigma_ss = new TetradMatrix(boldSigma.size(), boldSigma.size()); for (int i = 0; i < boldSigma.size(); i++) { for (int j = i; j < boldSigma.size(); j++) { Sigma sigmaef = boldSigma.get(i); Sigma sigmagh = boldSigma.get(j); Node e = sigmaef.getA(); Node f = sigmaef.getB(); Node g = sigmagh.getA(); Node h = sigmagh.getB(); if (cov != null && cov instanceof CorrelationMatrix) { // Assumes multinormality. Using formula 23. (Not implementing formula 22 // because that case // does not come up.) double rr = 0.5 * (r(e, f) * r(g, h)) * (r(e, g) * r(e, g) + r(e, h) * r(e, h) + r(f, g) * r(f, g) + r(f, h) * r(f, h)) + r(e, g) * r(f, h) + r(e, h) * r(f, g) - r(e, f) * (r(f, g) * r(f, h) + r(e, g) * r(e, h)) - r(g, h) * (r(f, g) * r(e, g) + r(f, h) * r(e, h)); sigma_ss.set(i, j, rr); sigma_ss.set(j, i, rr); } else if (cov != null && dataSet == null) { // Assumes multinormality--see p. 160. double _ss = r(e, g) * r(f, h) + r(e, h) * r(f, g); // + or -? Different advise. + in the code. // double _ss = r(e, g) * r(f, h) - r(e, h) * r(g, f); // shouldn't // this be a tetrad? sigma_ss.set(i, j, _ss); sigma_ss.set(j, i, _ss); } else { double _ss = sxyzw(e, f, g, h) - r(e, f) * r(g, h); sigma_ss.set(i, j, _ss); sigma_ss.set(j, i, _ss); } } } // Need a matrix of of population estimates of partial derivatives of tetrads // with respect to covariances in boldSigma. TetradMatrix del = new TetradMatrix(boldSigma.size(), sextads.length); for (int j = 0; j < sextads.length; j++) { Sextad sextad = sextads[j]; for (int i = 0; i < boldSigma.size(); i++) { Sigma sigma = boldSigma.get(i); double derivative = getDerivative(sextad, sigma); del.set(i, j, derivative); } } // Need a vector of population estimates of the sextads. TetradMatrix t = new TetradMatrix(sextads.length, 1); for (int i = 0; i < sextads.length; i++) { Sextad sextad = sextads[i]; List<Node> nodes = sextad.getNodes(); TetradMatrix m = new TetradMatrix(3, 3); for (int k1 = 0; k1 < 3; k1++) { for (int k2 = 0; k2 < 3; k2++) { m.set(k1, k2, r(nodes.get(k1), nodes.get(3 + k2))); } } double det = m.det(); t.set(i, 0, det); this.storedValue = det; // ? } // for (int i = 0; i < sextads.length; i++) { // Sextad sextad = sextads[i]; // // List<Node> nodes = new ArrayList<Node>(); // // nodes.add(sextad.getI()); // nodes.add(sextad.getJ()); // nodes.add(sextad.getK()); // nodes.add(sextad.getL()); // nodes.add(sextad.getM()); // nodes.add(sextad.getN()); // // TetradMatrix m = TetradMatrix.instance(3, 3); // // for (int k1 = 0; k1 < 3; k1++) { // for (int k2 = 0; k2 < 3; k2++) { // m.set(k1, k2, r(nodes.get(k1), nodes.get(3+k2))); // } // } // // double value = TetradAlgebra.det(m); // t.set(i, 0, value); // this.storedValue = value; // } TetradMatrix sigma_tt = del.transpose().times(sigma_ss).times(del); try { this.chisq = N * t.transpose().times(sigma_tt.inverse()).times(t).get(0, 0); return chisq; } catch (Exception e) { return Double.NaN; } }
/** * Constructs a new standardized SEM IM from the freeParameters in the given SEM IM. * * @param im Stop asking me for these things! The given SEM IM!!! * @param initialization CALCULATE_FROM_SEM if the initial values will be calculated from the * given SEM IM; INITIALIZE_FROM_DATA if data will be simulated from the given SEM, * standardized, and estimated. */ public StandardizedSemIm(SemIm im, Initialization initialization) { this.semPm = new SemPm(im.getSemPm()); this.semGraph = new SemGraph(semPm.getGraph()); semGraph.setShowErrorTerms(true); if (semGraph.existsDirectedCycle()) { throw new IllegalArgumentException("The cyclic case is not handled."); } if (initialization == Initialization.CALCULATE_FROM_SEM) { // This code calculates the new coefficients directly from the old ones. edgeParameters = new HashMap<Edge, Double>(); List<Node> nodes = im.getVariableNodes(); TetradMatrix impliedCovar = im.getImplCovar(true); for (Parameter parameter : im.getSemPm().getParameters()) { if (parameter.getType() == ParamType.COEF) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); int aindex = nodes.indexOf(a); int bindex = nodes.indexOf(b); double vara = impliedCovar.get(aindex, aindex); double stda = Math.sqrt(vara); double varb = impliedCovar.get(bindex, bindex); double stdb = Math.sqrt(varb); double oldCoef = im.getEdgeCoef(a, b); double newCoef = (stda / stdb) * oldCoef; edgeParameters.put(Edges.directedEdge(a, b), newCoef); } else if (parameter.getType() == ParamType.COVAR) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); Node exoa = semGraph.getExogenous(a); Node exob = semGraph.getExogenous(b); double covar = im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b)); edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar); } } } else { // This code estimates the new coefficients from simulated data from the old model. DataSet dataSet = im.simulateData(1000, false); TetradMatrix _dataSet = dataSet.getDoubleData(); _dataSet = DataUtils.standardizeData(_dataSet); DataSet dataSetStandardized = ColtDataSet.makeData(dataSet.getVariables(), _dataSet); SemEstimator estimator = new SemEstimator(dataSetStandardized, im.getSemPm()); SemIm imStandardized = estimator.estimate(); edgeParameters = new HashMap<Edge, Double>(); for (Parameter parameter : imStandardized.getSemPm().getParameters()) { if (parameter.getType() == ParamType.COEF) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); double coef = imStandardized.getEdgeCoef(a, b); edgeParameters.put(Edges.directedEdge(a, b), coef); } else if (parameter.getType() == ParamType.COVAR) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); Node exoa = semGraph.getExogenous(a); Node exob = semGraph.getExogenous(b); double covar = -im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b)); edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar); } } } this.measuredNodes = Collections.unmodifiableList(semPm.getMeasuredNodes()); }
public DataSet simulateDataCholesky( int sampleSize, TetradMatrix covar, List<Node> variableNodes) { List<Node> variables = new LinkedList<Node>(); for (Node node : variableNodes) { variables.add(node); } List<Node> newVariables = new ArrayList<Node>(); for (Node node : variables) { ContinuousVariable continuousVariable = new ContinuousVariable(node.getName()); continuousVariable.setNodeType(node.getNodeType()); newVariables.add(continuousVariable); } TetradMatrix impliedCovar = covar; DataSet fullDataSet = new ColtDataSet(sampleSize, newVariables); TetradMatrix cholesky = MatrixUtils.choleskyC(impliedCovar); // Simulate the data by repeatedly calling the Cholesky.exogenousData // method. Store only the data for the measured variables. ROW: for (int row = 0; row < sampleSize; row++) { // Step 1. Generate normal samples. double exoData[] = new double[cholesky.rows()]; for (int i = 0; i < exoData.length; i++) { exoData[i] = RandomUtil.getInstance().nextNormal(0, 1); // exoData[i] = randomUtil.nextUniform(-1, 1); } // Step 2. Multiply by cholesky to get correct covariance. double point[] = new double[exoData.length]; for (int i = 0; i < exoData.length; i++) { double sum = 0.0; for (int j = 0; j <= i; j++) { sum += cholesky.get(i, j) * exoData[j]; } point[i] = sum; } double rowData[] = point; for (int col = 0; col < variables.size(); col++) { int index = variableNodes.indexOf(variables.get(col)); double value = rowData[index]; if (Double.isNaN(value) || Double.isInfinite(value)) { throw new IllegalArgumentException("Value out of range: " + value); } fullDataSet.setDouble(row, col, value); } } return DataUtils.restrictToMeasured(fullDataSet); }