public boolean isIndependent(Node x, Node y, List<Node> z) {
    int[] all = new int[z.size() + 2];
    all[0] = variablesMap.get(x);
    all[1] = variablesMap.get(y);
    for (int i = 0; i < z.size(); i++) {
      all[i + 2] = variablesMap.get(z.get(i));
    }

    int sampleSize = data.get(0).rows();
    List<Double> pValues = new ArrayList<Double>();

    for (int m = 0; m < ncov.size(); m++) {
      TetradMatrix _ncov = ncov.get(m).getSelection(all, all);
      TetradMatrix inv = _ncov.inverse();
      double r = -inv.get(0, 1) / sqrt(inv.get(0, 0) * inv.get(1, 1));

      double fisherZ =
          sqrt(sampleSize - z.size() - 3.0) * 0.5 * (Math.log(1.0 + r) - Math.log(1.0 - r));
      double pValue;

      if (Double.isInfinite(fisherZ)) {
        pValue = 0;
      } else {
        pValue = 2.0 * (1.0 - RandomUtil.getInstance().normalCdf(0, 1, abs(fisherZ)));
      }

      pValues.add(pValue);
    }

    double _cutoff = alpha;

    if (fdr) {
      _cutoff = StatUtils.fdrCutoff(alpha, pValues, false);
    }

    Collections.sort(pValues);
    int index = (int) round((1.0 - percent) * pValues.size());
    this.pValue = pValues.get(index);

    //        if (this.pValue == 0) {
    //            System.out.println("Zero pvalue "+ SearchLogUtils.independenceFactMsg(x, y, z,
    // getPValue()));
    //        }

    boolean independent = this.pValue > _cutoff;

    if (verbose) {
      if (independent) {
        TetradLogger.getInstance()
            .log("independencies", SearchLogUtils.independenceFactMsg(x, y, z, getPValue()));
        //            System.out.println(SearchLogUtils.independenceFactMsg(x, y, z, getPValue()));
      } else {
        TetradLogger.getInstance()
            .log("dependencies", SearchLogUtils.dependenceFactMsg(x, y, z, getPValue()));
      }
    }

    return independent;
  }
예제 #2
0
  /**
   * Takes a Cholesky decomposition from the Cholesky.cholesky method and a set of data simulated
   * using the information in that matrix. Written by Don Crimbchin. Modified June 8, Matt
   * Easterday: added a random # seed so that data can be recalculated with the same result in
   * Causality lab
   *
   * @param cholesky the result from cholesky above.
   * @param randomUtil a random number generator, if null the method will make a new generator for
   *     each random number needed
   * @return an array the same length as the width or length (cholesky should have the same width
   *     and length) containing a randomly generate data set.
   */
  private double[] exogenousData(TetradMatrix cholesky, RandomUtil randomUtil) {

    // Step 1. Generate normal samples.
    double exoData[] = new double[cholesky.rows()];

    for (int i = 0; i < exoData.length; i++) {
      exoData[i] = randomUtil.nextNormal(0, 1);
    }

    // Step 2. Multiply by cholesky to get correct covariance.
    double point[] = new double[exoData.length];

    for (int i = 0; i < exoData.length; i++) {
      double sum = 0.0;

      for (int j = 0; j <= i; j++) {
        sum += cholesky.get(i, j) * exoData[j];
      }

      point[i] = sum;
    }

    return point;
  }
예제 #3
0
  /**
   * This method computes the information matrix or Hessian matrix of second order partial
   * derivatives of the fitting function (4B_2 on page 135 of Bollen) with respect to the free
   * freeParameters of the estimated SEM. It then computes the inverse of the the information matrix
   * and calculates the standard errors of the freeParameters as the square roots of the diagonal
   * elements of that matrix.
   *
   * @param estSem the estimated SEM.
   */
  public void computeStdErrors(ISemIm estSem) {
    //        if (!unmeasuredLatents(estSem.getSemPm()).isEmpty()) {
    //            int n = estSem.getFreeParameters().size();
    //            stdErrs = new double[n];
    //
    //            for (int i = 0; i < n; i++) {
    //                stdErrs[i] = Double.NaN;
    //            }
    //
    //            return;
    //        }

    //        this.semIm = estSem;
    estSem.setParameterBoundsEnforced(false);
    double[] paramsOriginal = estSem.getFreeParamValues();
    double delta;
    FittingFunction fcn = new SemFittingFunction(estSem);
    boolean ridder = false; // Ridder is more accurate but a lot slower.

    int n = fcn.getNumParameters();

    // Store the free freeParameters of the SemIm so that they can be reset to these
    // values.  The differentiation methods change them.
    double[] params = new double[n];
    System.arraycopy(paramsOriginal, 0, params, 0, n);

    // If the Ridder method (secondPartialDerivativeRidr) is used to search for
    // the best delta it is initially set to 0.1.  Otherwise the delta is set to
    // 0.005.  That value has worked well for those fitting functions tested to
    // date.
    if (ridder) {
      delta = 0.1;
    } else {
      delta = 0.005;
    }

    // The Hessian matrix of second order partial derivatives is called the
    // information matrix.
    TetradMatrix hess = new TetradMatrix(n, n);

    List<Parameter> freeParameters = estSem.getFreeParameters();
    boolean containsCovararianceParameter = false;

    for (Parameter p : freeParameters) {
      if (p.getType() == ParamType.COVAR) {
        containsCovararianceParameter = true;
        break;
      }
    }

    for (int i = 0; i < n; i++) {
      for (int j = i; j < n; j++) {
        Parameter pi = freeParameters.get(i);
        Parameter pj = freeParameters.get(j);

        if (!containsCovararianceParameter) {

          // Restrict off-diagonal to just collider edge freeParameters.
          if (i != j && (pi.getType() != ParamType.COEF || pj.getType() != ParamType.COEF)) {
            continue;
          }

          if (pi.getNodeB() != pj.getNodeB()) {
            continue;
          }
        }

        double v;

        if (ridder) {
          v = secondPartialDerivativeRidr(fcn, i, j, params, delta);
        } else {
          v = secondPartialDerivative(fcn, i, j, params, delta);
        }

        if (Math.abs(v) < 1e-7) {
          v = 0;
        }

        //                if (Double.isNaN(v)) {
        //                    v = 0;
        //                }

        hess.set(i, j, v);
        hess.set(j, i, v);
      }
    }

    ROWS:
    for (int i = 0; i < hess.rows(); i++) {
      for (int j = 0; j < hess.columns(); j++) {
        if (hess.get(i, j) != 0) {
          continue ROWS;
        }
      }

      //            System.out.println("Zero row for " + freeParameters.get(i));
    }

    // The diagonal elements of the inverse of the information matrix are the
    // squares of the standard errors of the freeParameters.  Their order is the
    // same as in the array of free parameter values stored in paramsOriginal.
    try {

      TetradMatrix hessInv = hess.inverse();
      //            TetradMatrix hessInv = hess.ginverse();

      //            System.out.println("Inverse: " + hessInv);

      //            for (int i = 0; i < freeParameters.size(); i++) {
      //                System.out.println(i + " = " + freeParameters.get(i));
      //            }

      stdErrs = new double[n];

      // Hence the standard errors of the freeParameters are the square roots of the
      // diagonal elements of the inverse of the information matrix.
      for (int i = 0; i < n; i++) {
        double v = Math.sqrt((2.0 / (estSem.getSampleSize() - 1)) * hessInv.get(i, i));

        if (v == 0) {
          System.out.println("v = " + v + " hessInv(i, i) = " + hessInv.get(i, i));
        }

        if (v == 0) {
          stdErrs[i] = Double.NaN;
        } else {
          stdErrs[i] = v;
        }
      }
    } catch (Exception e) {
      e.printStackTrace();

      stdErrs = new double[n];

      for (int i = 0; i < n; i++) {
        stdErrs[i] = Double.NaN;
      }
    }

    // Restore the freeParameters of the estimated SEM to their original values.
    estSem.setFreeParamValues(paramsOriginal);
    estSem.setParameterBoundsEnforced(true);
  }
예제 #4
0
  /**
   * Constructs a new standardized SEM IM from the freeParameters in the given SEM IM.
   *
   * @param im Stop asking me for these things! The given SEM IM!!!
   * @param initialization CALCULATE_FROM_SEM if the initial values will be calculated from the
   *     given SEM IM; INITIALIZE_FROM_DATA if data will be simulated from the given SEM,
   *     standardized, and estimated.
   */
  public StandardizedSemIm(SemIm im, Initialization initialization) {
    this.semPm = new SemPm(im.getSemPm());
    this.semGraph = new SemGraph(semPm.getGraph());
    semGraph.setShowErrorTerms(true);

    if (semGraph.existsDirectedCycle()) {
      throw new IllegalArgumentException("The cyclic case is not handled.");
    }

    if (initialization == Initialization.CALCULATE_FROM_SEM) {
      //         This code calculates the new coefficients directly from the old ones.
      edgeParameters = new HashMap<Edge, Double>();

      List<Node> nodes = im.getVariableNodes();
      TetradMatrix impliedCovar = im.getImplCovar(true);

      for (Parameter parameter : im.getSemPm().getParameters()) {
        if (parameter.getType() == ParamType.COEF) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          int aindex = nodes.indexOf(a);
          int bindex = nodes.indexOf(b);
          double vara = impliedCovar.get(aindex, aindex);
          double stda = Math.sqrt(vara);
          double varb = impliedCovar.get(bindex, bindex);
          double stdb = Math.sqrt(varb);
          double oldCoef = im.getEdgeCoef(a, b);
          double newCoef = (stda / stdb) * oldCoef;
          edgeParameters.put(Edges.directedEdge(a, b), newCoef);
        } else if (parameter.getType() == ParamType.COVAR) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          Node exoa = semGraph.getExogenous(a);
          Node exob = semGraph.getExogenous(b);
          double covar = im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b));
          edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar);
        }
      }
    } else {

      // This code estimates the new coefficients from simulated data from the old model.
      DataSet dataSet = im.simulateData(1000, false);
      TetradMatrix _dataSet = dataSet.getDoubleData();
      _dataSet = DataUtils.standardizeData(_dataSet);
      DataSet dataSetStandardized = ColtDataSet.makeData(dataSet.getVariables(), _dataSet);

      SemEstimator estimator = new SemEstimator(dataSetStandardized, im.getSemPm());
      SemIm imStandardized = estimator.estimate();

      edgeParameters = new HashMap<Edge, Double>();

      for (Parameter parameter : imStandardized.getSemPm().getParameters()) {
        if (parameter.getType() == ParamType.COEF) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          double coef = imStandardized.getEdgeCoef(a, b);
          edgeParameters.put(Edges.directedEdge(a, b), coef);
        } else if (parameter.getType() == ParamType.COVAR) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          Node exoa = semGraph.getExogenous(a);
          Node exob = semGraph.getExogenous(b);
          double covar = -im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b));
          edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar);
        }
      }
    }

    this.measuredNodes = Collections.unmodifiableList(semPm.getMeasuredNodes());
  }
예제 #5
0
  public DataSet simulateDataCholesky(
      int sampleSize, TetradMatrix covar, List<Node> variableNodes) {
    List<Node> variables = new LinkedList<Node>();

    for (Node node : variableNodes) {
      variables.add(node);
    }

    List<Node> newVariables = new ArrayList<Node>();

    for (Node node : variables) {
      ContinuousVariable continuousVariable = new ContinuousVariable(node.getName());
      continuousVariable.setNodeType(node.getNodeType());
      newVariables.add(continuousVariable);
    }

    TetradMatrix impliedCovar = covar;

    DataSet fullDataSet = new ColtDataSet(sampleSize, newVariables);
    TetradMatrix cholesky = MatrixUtils.choleskyC(impliedCovar);

    // Simulate the data by repeatedly calling the Cholesky.exogenousData
    // method. Store only the data for the measured variables.
    ROW:
    for (int row = 0; row < sampleSize; row++) {

      // Step 1. Generate normal samples.
      double exoData[] = new double[cholesky.rows()];

      for (int i = 0; i < exoData.length; i++) {
        exoData[i] = RandomUtil.getInstance().nextNormal(0, 1);
        //            exoData[i] = randomUtil.nextUniform(-1, 1);
      }

      // Step 2. Multiply by cholesky to get correct covariance.
      double point[] = new double[exoData.length];

      for (int i = 0; i < exoData.length; i++) {
        double sum = 0.0;

        for (int j = 0; j <= i; j++) {
          sum += cholesky.get(i, j) * exoData[j];
        }

        point[i] = sum;
      }

      double rowData[] = point;

      for (int col = 0; col < variables.size(); col++) {
        int index = variableNodes.indexOf(variables.get(col));
        double value = rowData[index];

        if (Double.isNaN(value) || Double.isInfinite(value)) {
          throw new IllegalArgumentException("Value out of range: " + value);
        }

        fullDataSet.setDouble(row, col, value);
      }
    }

    return DataUtils.restrictToMeasured(fullDataSet);
  }