@Test
  public void test2() {
    RandomUtil.getInstance().setSeed(2999983L);

    int sampleSize = 1000;

    List<Node> variableNodes = new ArrayList<>();
    ContinuousVariable x1 = new ContinuousVariable("X1");
    ContinuousVariable x2 = new ContinuousVariable("X2");
    ContinuousVariable x3 = new ContinuousVariable("X3");
    ContinuousVariable x4 = new ContinuousVariable("X4");
    ContinuousVariable x5 = new ContinuousVariable("X5");

    variableNodes.add(x1);
    variableNodes.add(x2);
    variableNodes.add(x3);
    variableNodes.add(x4);
    variableNodes.add(x5);

    Graph _graph = new EdgeListGraph(variableNodes);
    SemGraph graph = new SemGraph(_graph);
    graph.addDirectedEdge(x1, x3);
    graph.addDirectedEdge(x2, x3);
    graph.addDirectedEdge(x3, x4);
    graph.addDirectedEdge(x2, x4);
    graph.addDirectedEdge(x4, x5);
    graph.addDirectedEdge(x2, x5);

    SemPm semPm = new SemPm(graph);
    SemIm semIm = new SemIm(semPm);
    DataSet dataSet = semIm.simulateData(sampleSize, false);

    print(semPm);

    GeneralizedSemPm _semPm = new GeneralizedSemPm(semPm);
    GeneralizedSemIm _semIm = new GeneralizedSemIm(_semPm, semIm);
    DataSet _dataSet = _semIm.simulateDataMinimizeSurface(sampleSize, false);

    print(_semPm);

    //        System.out.println(_dataSet);

    for (int j = 0; j < dataSet.getNumColumns(); j++) {
      double[] col = dataSet.getDoubleData().getColumn(j).toArray();
      double[] _col = _dataSet.getDoubleData().getColumn(j).toArray();

      double mean = StatUtils.mean(col);
      double _mean = StatUtils.mean(_col);

      double variance = StatUtils.variance(col);
      double _variance = StatUtils.variance(_col);

      assertEquals(mean, _mean, 0.3);
      assertEquals(1.0, variance / _variance, .2);
    }
  }
Example #2
0
  /**
   * Constructs a new standardized SEM IM from the freeParameters in the given SEM IM.
   *
   * @param im Stop asking me for these things! The given SEM IM!!!
   * @param initialization CALCULATE_FROM_SEM if the initial values will be calculated from the
   *     given SEM IM; INITIALIZE_FROM_DATA if data will be simulated from the given SEM,
   *     standardized, and estimated.
   */
  public StandardizedSemIm(SemIm im, Initialization initialization) {
    this.semPm = new SemPm(im.getSemPm());
    this.semGraph = new SemGraph(semPm.getGraph());
    semGraph.setShowErrorTerms(true);

    if (semGraph.existsDirectedCycle()) {
      throw new IllegalArgumentException("The cyclic case is not handled.");
    }

    if (initialization == Initialization.CALCULATE_FROM_SEM) {
      //         This code calculates the new coefficients directly from the old ones.
      edgeParameters = new HashMap<Edge, Double>();

      List<Node> nodes = im.getVariableNodes();
      TetradMatrix impliedCovar = im.getImplCovar(true);

      for (Parameter parameter : im.getSemPm().getParameters()) {
        if (parameter.getType() == ParamType.COEF) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          int aindex = nodes.indexOf(a);
          int bindex = nodes.indexOf(b);
          double vara = impliedCovar.get(aindex, aindex);
          double stda = Math.sqrt(vara);
          double varb = impliedCovar.get(bindex, bindex);
          double stdb = Math.sqrt(varb);
          double oldCoef = im.getEdgeCoef(a, b);
          double newCoef = (stda / stdb) * oldCoef;
          edgeParameters.put(Edges.directedEdge(a, b), newCoef);
        } else if (parameter.getType() == ParamType.COVAR) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          Node exoa = semGraph.getExogenous(a);
          Node exob = semGraph.getExogenous(b);
          double covar = im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b));
          edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar);
        }
      }
    } else {

      // This code estimates the new coefficients from simulated data from the old model.
      DataSet dataSet = im.simulateData(1000, false);
      TetradMatrix _dataSet = dataSet.getDoubleData();
      _dataSet = DataUtils.standardizeData(_dataSet);
      DataSet dataSetStandardized = ColtDataSet.makeData(dataSet.getVariables(), _dataSet);

      SemEstimator estimator = new SemEstimator(dataSetStandardized, im.getSemPm());
      SemIm imStandardized = estimator.estimate();

      edgeParameters = new HashMap<Edge, Double>();

      for (Parameter parameter : imStandardized.getSemPm().getParameters()) {
        if (parameter.getType() == ParamType.COEF) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          double coef = imStandardized.getEdgeCoef(a, b);
          edgeParameters.put(Edges.directedEdge(a, b), coef);
        } else if (parameter.getType() == ParamType.COVAR) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          Node exoa = semGraph.getExogenous(a);
          Node exob = semGraph.getExogenous(b);
          double covar = -im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b));
          edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar);
        }
      }
    }

    this.measuredNodes = Collections.unmodifiableList(semPm.getMeasuredNodes());
  }