示例#1
0
  /**
   * @param sampleSize The sample size of the desired data set.
   * @param latentDataSaved True if latent variables should be included in the data set.
   * @return This returns a standardized data set simulated from the model, using the reduced form
   *     method.
   */
  public DataSet simulateDataReducedForm(int sampleSize, boolean latentDataSaved) {
    int numVars = getVariableNodes().size();

    // Calculate inv(I - edgeCoef)
    TetradMatrix edgeCoef = edgeCoef().copy().transpose();

    //        TetradMatrix iMinusB = TetradAlgebra.identity(edgeCoef.rows());
    //        iMinusB.assign(edgeCoef, Functions.minus);

    TetradMatrix iMinusB = TetradAlgebra.identity(edgeCoef.rows()).minus(edgeCoef);

    TetradMatrix inv = iMinusB.inverse();

    // Pick error values e, for each calculate inv * e.
    TetradMatrix sim = new TetradMatrix(sampleSize, numVars);

    // Generate error data with the right variances and covariances, then override this
    // with error data for varaibles that have special distributions defined. Not ideal,
    // but not sure what else to do at the moment. It's better than not taking covariances
    // into account!
    TetradMatrix cholesky = MatrixUtils.choleskyC(errCovar(errorVariances()));

    for (int i = 0; i < sampleSize; i++) {
      TetradVector e = new TetradVector(exogenousData(cholesky, RandomUtil.getInstance()));
      TetradVector ePrime = inv.times(e);
      sim.assignRow(i, ePrime); // sim.viewRow(i).assign(ePrime);
    }

    DataSet fullDataSet = ColtDataSet.makeContinuousData(getVariableNodes(), sim);

    if (latentDataSaved) {
      return fullDataSet;
    } else {
      return DataUtils.restrictToMeasured(fullDataSet);
    }
  }
示例#2
0
  /**
   * Takes a Cholesky decomposition from the Cholesky.cholesky method and a set of data simulated
   * using the information in that matrix. Written by Don Crimbchin. Modified June 8, Matt
   * Easterday: added a random # seed so that data can be recalculated with the same result in
   * Causality lab
   *
   * @param cholesky the result from cholesky above.
   * @param randomUtil a random number generator, if null the method will make a new generator for
   *     each random number needed
   * @return an array the same length as the width or length (cholesky should have the same width
   *     and length) containing a randomly generate data set.
   */
  private double[] exogenousData(TetradMatrix cholesky, RandomUtil randomUtil) {

    // Step 1. Generate normal samples.
    double exoData[] = new double[cholesky.rows()];

    for (int i = 0; i < exoData.length; i++) {
      exoData[i] = randomUtil.nextNormal(0, 1);
    }

    // Step 2. Multiply by cholesky to get correct covariance.
    double point[] = new double[exoData.length];

    for (int i = 0; i < exoData.length; i++) {
      double sum = 0.0;

      for (int j = 0; j <= i; j++) {
        sum += cholesky.get(i, j) * exoData[j];
      }

      point[i] = sum;
    }

    return point;
  }
  public IndTestFisherZPercentIndependent(List<DataSet> dataSets, double alpha) {
    this.dataSets = dataSets;
    this.variables = dataSets.get(0).getVariables();

    data = new ArrayList<TetradMatrix>();

    for (DataSet dataSet : dataSets) {
      dataSet = DataUtils.center(dataSet);
      TetradMatrix _data = dataSet.getDoubleData();
      data.add(_data);
    }

    ncov = new ArrayList<TetradMatrix>();
    for (TetradMatrix d : this.data) ncov.add(d.transpose().times(d).scalarMult(1.0 / d.rows()));

    setAlpha(alpha);
    rows = new int[dataSets.get(0).getNumRows()];
    for (int i = 0; i < getRows().length; i++) getRows()[i] = i;

    variablesMap = new HashMap<Node, Integer>();
    for (int i = 0; i < variables.size(); i++) {
      variablesMap.put(variables.get(i), i);
    }

    this.recursivePartialCorrelation = new ArrayList<RecursivePartialCorrelation>();
    for (TetradMatrix covMatrix : ncov) {
      recursivePartialCorrelation.add(new RecursivePartialCorrelation(getVariables(), covMatrix));
    }
  }
  /**
   * This method computes the information matrix or Hessian matrix of second order partial
   * derivatives of the fitting function (4B_2 on page 135 of Bollen) with respect to the free
   * freeParameters of the estimated SEM. It then computes the inverse of the the information matrix
   * and calculates the standard errors of the freeParameters as the square roots of the diagonal
   * elements of that matrix.
   *
   * @param estSem the estimated SEM.
   */
  public void computeStdErrors(ISemIm estSem) {
    //        if (!unmeasuredLatents(estSem.getSemPm()).isEmpty()) {
    //            int n = estSem.getFreeParameters().size();
    //            stdErrs = new double[n];
    //
    //            for (int i = 0; i < n; i++) {
    //                stdErrs[i] = Double.NaN;
    //            }
    //
    //            return;
    //        }

    //        this.semIm = estSem;
    estSem.setParameterBoundsEnforced(false);
    double[] paramsOriginal = estSem.getFreeParamValues();
    double delta;
    FittingFunction fcn = new SemFittingFunction(estSem);
    boolean ridder = false; // Ridder is more accurate but a lot slower.

    int n = fcn.getNumParameters();

    // Store the free freeParameters of the SemIm so that they can be reset to these
    // values.  The differentiation methods change them.
    double[] params = new double[n];
    System.arraycopy(paramsOriginal, 0, params, 0, n);

    // If the Ridder method (secondPartialDerivativeRidr) is used to search for
    // the best delta it is initially set to 0.1.  Otherwise the delta is set to
    // 0.005.  That value has worked well for those fitting functions tested to
    // date.
    if (ridder) {
      delta = 0.1;
    } else {
      delta = 0.005;
    }

    // The Hessian matrix of second order partial derivatives is called the
    // information matrix.
    TetradMatrix hess = new TetradMatrix(n, n);

    List<Parameter> freeParameters = estSem.getFreeParameters();
    boolean containsCovararianceParameter = false;

    for (Parameter p : freeParameters) {
      if (p.getType() == ParamType.COVAR) {
        containsCovararianceParameter = true;
        break;
      }
    }

    for (int i = 0; i < n; i++) {
      for (int j = i; j < n; j++) {
        Parameter pi = freeParameters.get(i);
        Parameter pj = freeParameters.get(j);

        if (!containsCovararianceParameter) {

          // Restrict off-diagonal to just collider edge freeParameters.
          if (i != j && (pi.getType() != ParamType.COEF || pj.getType() != ParamType.COEF)) {
            continue;
          }

          if (pi.getNodeB() != pj.getNodeB()) {
            continue;
          }
        }

        double v;

        if (ridder) {
          v = secondPartialDerivativeRidr(fcn, i, j, params, delta);
        } else {
          v = secondPartialDerivative(fcn, i, j, params, delta);
        }

        if (Math.abs(v) < 1e-7) {
          v = 0;
        }

        //                if (Double.isNaN(v)) {
        //                    v = 0;
        //                }

        hess.set(i, j, v);
        hess.set(j, i, v);
      }
    }

    ROWS:
    for (int i = 0; i < hess.rows(); i++) {
      for (int j = 0; j < hess.columns(); j++) {
        if (hess.get(i, j) != 0) {
          continue ROWS;
        }
      }

      //            System.out.println("Zero row for " + freeParameters.get(i));
    }

    // The diagonal elements of the inverse of the information matrix are the
    // squares of the standard errors of the freeParameters.  Their order is the
    // same as in the array of free parameter values stored in paramsOriginal.
    try {

      TetradMatrix hessInv = hess.inverse();
      //            TetradMatrix hessInv = hess.ginverse();

      //            System.out.println("Inverse: " + hessInv);

      //            for (int i = 0; i < freeParameters.size(); i++) {
      //                System.out.println(i + " = " + freeParameters.get(i));
      //            }

      stdErrs = new double[n];

      // Hence the standard errors of the freeParameters are the square roots of the
      // diagonal elements of the inverse of the information matrix.
      for (int i = 0; i < n; i++) {
        double v = Math.sqrt((2.0 / (estSem.getSampleSize() - 1)) * hessInv.get(i, i));

        if (v == 0) {
          System.out.println("v = " + v + " hessInv(i, i) = " + hessInv.get(i, i));
        }

        if (v == 0) {
          stdErrs[i] = Double.NaN;
        } else {
          stdErrs[i] = v;
        }
      }
    } catch (Exception e) {
      e.printStackTrace();

      stdErrs = new double[n];

      for (int i = 0; i < n; i++) {
        stdErrs[i] = Double.NaN;
      }
    }

    // Restore the freeParameters of the estimated SEM to their original values.
    estSem.setFreeParamValues(paramsOriginal);
    estSem.setParameterBoundsEnforced(true);
  }
  public DataSet simulateDataCholesky(
      int sampleSize, TetradMatrix covar, List<Node> variableNodes) {
    List<Node> variables = new LinkedList<Node>();

    for (Node node : variableNodes) {
      variables.add(node);
    }

    List<Node> newVariables = new ArrayList<Node>();

    for (Node node : variables) {
      ContinuousVariable continuousVariable = new ContinuousVariable(node.getName());
      continuousVariable.setNodeType(node.getNodeType());
      newVariables.add(continuousVariable);
    }

    TetradMatrix impliedCovar = covar;

    DataSet fullDataSet = new ColtDataSet(sampleSize, newVariables);
    TetradMatrix cholesky = MatrixUtils.choleskyC(impliedCovar);

    // Simulate the data by repeatedly calling the Cholesky.exogenousData
    // method. Store only the data for the measured variables.
    ROW:
    for (int row = 0; row < sampleSize; row++) {

      // Step 1. Generate normal samples.
      double exoData[] = new double[cholesky.rows()];

      for (int i = 0; i < exoData.length; i++) {
        exoData[i] = RandomUtil.getInstance().nextNormal(0, 1);
        //            exoData[i] = randomUtil.nextUniform(-1, 1);
      }

      // Step 2. Multiply by cholesky to get correct covariance.
      double point[] = new double[exoData.length];

      for (int i = 0; i < exoData.length; i++) {
        double sum = 0.0;

        for (int j = 0; j <= i; j++) {
          sum += cholesky.get(i, j) * exoData[j];
        }

        point[i] = sum;
      }

      double rowData[] = point;

      for (int col = 0; col < variables.size(); col++) {
        int index = variableNodes.indexOf(variables.get(col));
        double value = rowData[index];

        if (Double.isNaN(value) || Double.isInfinite(value)) {
          throw new IllegalArgumentException("Value out of range: " + value);
        }

        fullDataSet.setDouble(row, col, value);
      }
    }

    return DataUtils.restrictToMeasured(fullDataSet);
  }