Exemple #1
0
  /**
   * Calculates the sample likelihood and BIC score for i given its parents in a simple SEM model.
   */
  private double localSemScore(int i, int[] parents) {
    try {
      ICovarianceMatrix cov = getCovMatrix();
      double varianceY = cov.getValue(i, i);
      double residualVariance = varianceY;
      int n = sampleSize();
      int p = parents.length;
      int k = (p * (p + 1)) / 2 + p;
      //            int k = (p + 1) * (p + 1);
      //            int k = p + 1;
      TetradMatrix covxx = cov.getSelection(parents, parents);
      TetradMatrix covxxInv = covxx.inverse();
      TetradVector covxy = cov.getSelection(parents, new int[] {i}).getColumn(0);
      TetradVector b = covxxInv.times(covxy);
      residualVariance -= covxy.dotProduct(b);

      if (residualVariance <= 0 && verbose) {
        out.println(
            "Nonpositive residual varianceY: resVar / varianceY = "
                + (residualVariance / varianceY));
        return Double.NaN;
      }

      double c = getPenaltyDiscount();

      //            return -n * log(residualVariance) - 2 * k; //AIC
      return -n * Math.log(residualVariance) - c * k * Math.log(n);
      //            return -n * log(residualVariance) - c * k * (log(n) - log(2 * PI));
    } catch (Exception e) {
      e.printStackTrace();
      throw new RuntimeException(e);
      //            throwMinimalLinearDependentSet(parents, cov);
    }
  }
  /**
   * @return Returns the error covariance matrix of the model. i.e. [a][b] is the covariance of E_a
   *     and E_b, with [a][a] of course being the variance of E_a. THESE ARE NOT PARAMETERS OF THE
   *     MODEL; THEY ARE CALCULATED. Note that elements of this matrix may be Double.NaN; this
   *     indicates that these elements cannot be calculated.
   */
  private TetradMatrix errCovar(Map<Node, Double> errorVariances) {
    List<Node> variableNodes = getVariableNodes();
    List<Node> errorNodes = new ArrayList<Node>();

    for (Node node : variableNodes) {
      errorNodes.add(semGraph.getExogenous(node));
    }

    TetradMatrix errorCovar = new TetradMatrix(errorVariances.size(), errorVariances.size());

    for (int index = 0; index < errorNodes.size(); index++) {
      Node error = errorNodes.get(index);
      double variance = getErrorVariance(error);
      errorCovar.set(index, index, variance);
    }

    for (int index1 = 0; index1 < errorNodes.size(); index1++) {
      for (int index2 = 0; index2 < errorNodes.size(); index2++) {
        Node error1 = errorNodes.get(index1);
        Node error2 = errorNodes.get(index2);
        Edge edge = semGraph.getEdge(error1, error2);

        if (edge != null && Edges.isBidirectedEdge(edge)) {
          double covariance = getErrorCovariance(error1, error2);
          errorCovar.set(index1, index2, covariance);
        }
      }
    }

    return errorCovar;
  }
  /**
   * @return The edge coefficient matrix of the model, a la SemIm. Note that this will normally need
   *     to be transposed, since [a][b] is the edge coefficient for a-->b, not b-->a. Sorry.
   *     History. THESE ARE PARAMETERS OF THE MODEL--THE ONLY PARAMETERS.
   */
  public TetradMatrix edgeCoef() {
    List<Node> variableNodes = getVariableNodes();

    TetradMatrix edgeCoef = new TetradMatrix(variableNodes.size(), variableNodes.size());

    for (Edge edge : edgeParameters.keySet()) {
      if (Edges.isBidirectedEdge(edge)) {
        continue;
      }

      Node a = edge.getNode1();
      Node b = edge.getNode2();

      int aindex = variableNodes.indexOf(a);
      int bindex = variableNodes.indexOf(b);

      double coef = edgeParameters.get(edge);

      edgeCoef.set(aindex, bindex, coef);
    }

    return edgeCoef;
  }
  /**
   * Takes a Cholesky decomposition from the Cholesky.cholesky method and a set of data simulated
   * using the information in that matrix. Written by Don Crimbchin. Modified June 8, Matt
   * Easterday: added a random # seed so that data can be recalculated with the same result in
   * Causality lab
   *
   * @param cholesky the result from cholesky above.
   * @param randomUtil a random number generator, if null the method will make a new generator for
   *     each random number needed
   * @return an array the same length as the width or length (cholesky should have the same width
   *     and length) containing a randomly generate data set.
   */
  private double[] exogenousData(TetradMatrix cholesky, RandomUtil randomUtil) {

    // Step 1. Generate normal samples.
    double exoData[] = new double[cholesky.rows()];

    for (int i = 0; i < exoData.length; i++) {
      exoData[i] = randomUtil.nextNormal(0, 1);
    }

    // Step 2. Multiply by cholesky to get correct covariance.
    double point[] = new double[exoData.length];

    for (int i = 0; i < exoData.length; i++) {
      double sum = 0.0;

      for (int j = 0; j <= i; j++) {
        sum += cholesky.get(i, j) * exoData[j];
      }

      point[i] = sum;
    }

    return point;
  }
  /**
   * @param sampleSize The sample size of the desired data set.
   * @param latentDataSaved True if latent variables should be included in the data set.
   * @return This returns a standardized data set simulated from the model, using the reduced form
   *     method.
   */
  public DataSet simulateDataReducedForm(int sampleSize, boolean latentDataSaved) {
    int numVars = getVariableNodes().size();

    // Calculate inv(I - edgeCoef)
    TetradMatrix edgeCoef = edgeCoef().copy().transpose();

    //        TetradMatrix iMinusB = TetradAlgebra.identity(edgeCoef.rows());
    //        iMinusB.assign(edgeCoef, Functions.minus);

    TetradMatrix iMinusB = TetradAlgebra.identity(edgeCoef.rows()).minus(edgeCoef);

    TetradMatrix inv = iMinusB.inverse();

    // Pick error values e, for each calculate inv * e.
    TetradMatrix sim = new TetradMatrix(sampleSize, numVars);

    // Generate error data with the right variances and covariances, then override this
    // with error data for varaibles that have special distributions defined. Not ideal,
    // but not sure what else to do at the moment. It's better than not taking covariances
    // into account!
    TetradMatrix cholesky = MatrixUtils.choleskyC(errCovar(errorVariances()));

    for (int i = 0; i < sampleSize; i++) {
      TetradVector e = new TetradVector(exogenousData(cholesky, RandomUtil.getInstance()));
      TetradVector ePrime = inv.times(e);
      sim.assignRow(i, ePrime); // sim.viewRow(i).assign(ePrime);
    }

    DataSet fullDataSet = ColtDataSet.makeContinuousData(getVariableNodes(), sim);

    if (latentDataSaved) {
      return fullDataSet;
    } else {
      return DataUtils.restrictToMeasured(fullDataSet);
    }
  }
  /**
   * Constructs a new standardized SEM IM from the freeParameters in the given SEM IM.
   *
   * @param im Stop asking me for these things! The given SEM IM!!!
   * @param initialization CALCULATE_FROM_SEM if the initial values will be calculated from the
   *     given SEM IM; INITIALIZE_FROM_DATA if data will be simulated from the given SEM,
   *     standardized, and estimated.
   */
  public StandardizedSemIm(SemIm im, Initialization initialization) {
    this.semPm = new SemPm(im.getSemPm());
    this.semGraph = new SemGraph(semPm.getGraph());
    semGraph.setShowErrorTerms(true);

    if (semGraph.existsDirectedCycle()) {
      throw new IllegalArgumentException("The cyclic case is not handled.");
    }

    if (initialization == Initialization.CALCULATE_FROM_SEM) {
      //         This code calculates the new coefficients directly from the old ones.
      edgeParameters = new HashMap<Edge, Double>();

      List<Node> nodes = im.getVariableNodes();
      TetradMatrix impliedCovar = im.getImplCovar(true);

      for (Parameter parameter : im.getSemPm().getParameters()) {
        if (parameter.getType() == ParamType.COEF) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          int aindex = nodes.indexOf(a);
          int bindex = nodes.indexOf(b);
          double vara = impliedCovar.get(aindex, aindex);
          double stda = Math.sqrt(vara);
          double varb = impliedCovar.get(bindex, bindex);
          double stdb = Math.sqrt(varb);
          double oldCoef = im.getEdgeCoef(a, b);
          double newCoef = (stda / stdb) * oldCoef;
          edgeParameters.put(Edges.directedEdge(a, b), newCoef);
        } else if (parameter.getType() == ParamType.COVAR) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          Node exoa = semGraph.getExogenous(a);
          Node exob = semGraph.getExogenous(b);
          double covar = im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b));
          edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar);
        }
      }
    } else {

      // This code estimates the new coefficients from simulated data from the old model.
      DataSet dataSet = im.simulateData(1000, false);
      TetradMatrix _dataSet = dataSet.getDoubleData();
      _dataSet = DataUtils.standardizeData(_dataSet);
      DataSet dataSetStandardized = ColtDataSet.makeData(dataSet.getVariables(), _dataSet);

      SemEstimator estimator = new SemEstimator(dataSetStandardized, im.getSemPm());
      SemIm imStandardized = estimator.estimate();

      edgeParameters = new HashMap<Edge, Double>();

      for (Parameter parameter : imStandardized.getSemPm().getParameters()) {
        if (parameter.getType() == ParamType.COEF) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          double coef = imStandardized.getEdgeCoef(a, b);
          edgeParameters.put(Edges.directedEdge(a, b), coef);
        } else if (parameter.getType() == ParamType.COVAR) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          Node exoa = semGraph.getExogenous(a);
          Node exob = semGraph.getExogenous(b);
          double covar = -im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b));
          edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar);
        }
      }
    }

    this.measuredNodes = Collections.unmodifiableList(semPm.getMeasuredNodes());
  }