Esempio n. 1
0
  // Causes a package cycle.
  public void testManualDiscretize2() {
    Graph graph = new Dag(GraphUtils.randomGraph(5, 0, 5, 3, 3, 3, false));
    SemPm pm = new SemPm(graph);
    SemIm im = new SemIm(pm);
    DataSet data = im.simulateData(100, false);

    List<Node> nodes = data.getVariables();

    Discretizer discretizer = new Discretizer(data);
    //        discretizer.setVariablesCopied(true);

    discretizer.equalCounts(nodes.get(0), 3);
    discretizer.equalIntervals(nodes.get(1), 2);
    discretizer.equalCounts(nodes.get(2), 5);
    discretizer.equalIntervals(nodes.get(3), 8);
    discretizer.equalCounts(nodes.get(4), 4);

    DataSet discretized = discretizer.discretize();

    System.out.println(discretized);

    assertEquals(2, maxInColumn(discretized, 0));
    assertEquals(1, maxInColumn(discretized, 1));
    assertEquals(4, maxInColumn(discretized, 2));
    assertEquals(7, maxInColumn(discretized, 3));
    assertEquals(3, maxInColumn(discretized, 4));
  }
Esempio n. 2
0
  @Test
  public void test2() {
    RandomUtil.getInstance().setSeed(2999983L);

    int sampleSize = 1000;

    List<Node> variableNodes = new ArrayList<>();
    ContinuousVariable x1 = new ContinuousVariable("X1");
    ContinuousVariable x2 = new ContinuousVariable("X2");
    ContinuousVariable x3 = new ContinuousVariable("X3");
    ContinuousVariable x4 = new ContinuousVariable("X4");
    ContinuousVariable x5 = new ContinuousVariable("X5");

    variableNodes.add(x1);
    variableNodes.add(x2);
    variableNodes.add(x3);
    variableNodes.add(x4);
    variableNodes.add(x5);

    Graph _graph = new EdgeListGraph(variableNodes);
    SemGraph graph = new SemGraph(_graph);
    graph.addDirectedEdge(x1, x3);
    graph.addDirectedEdge(x2, x3);
    graph.addDirectedEdge(x3, x4);
    graph.addDirectedEdge(x2, x4);
    graph.addDirectedEdge(x4, x5);
    graph.addDirectedEdge(x2, x5);

    SemPm semPm = new SemPm(graph);
    SemIm semIm = new SemIm(semPm);
    DataSet dataSet = semIm.simulateData(sampleSize, false);

    print(semPm);

    GeneralizedSemPm _semPm = new GeneralizedSemPm(semPm);
    GeneralizedSemIm _semIm = new GeneralizedSemIm(_semPm, semIm);
    DataSet _dataSet = _semIm.simulateDataMinimizeSurface(sampleSize, false);

    print(_semPm);

    //        System.out.println(_dataSet);

    for (int j = 0; j < dataSet.getNumColumns(); j++) {
      double[] col = dataSet.getDoubleData().getColumn(j).toArray();
      double[] _col = _dataSet.getDoubleData().getColumn(j).toArray();

      double mean = StatUtils.mean(col);
      double _mean = StatUtils.mean(_col);

      double variance = StatUtils.variance(col);
      double _variance = StatUtils.variance(_col);

      assertEquals(mean, _mean, 0.3);
      assertEquals(1.0, variance / _variance, .2);
    }
  }
Esempio n. 3
0
  @Test
  public void test8() {
    RandomUtil.getInstance().setSeed(29999483L);

    Node x = new GraphNode("X");
    Node y = new GraphNode("Y");

    List<Node> nodes = new ArrayList<>();
    nodes.add(x);
    nodes.add(y);

    Graph graph = new EdgeListGraphSingleConnections(nodes);

    graph.addDirectedEdge(x, y);

    SemPm spm = new SemPm(graph);
    SemIm sim = new SemIm(spm);

    sim.setEdgeCoef(x, y, 20);
    sim.setErrVar(x, 1);
    sim.setErrVar(y, 1);

    GeneralizedSemPm pm = new GeneralizedSemPm(spm);
    GeneralizedSemIm im = new GeneralizedSemIm(pm, sim);

    print(im);

    try {
      pm.setParameterEstimationInitializationExpression("b1", "U(10, 30)");
      pm.setParameterEstimationInitializationExpression("T1", "U(.1, 3)");
      pm.setParameterEstimationInitializationExpression("T2", "U(.1, 3)");
    } catch (ParseException e) {
      e.printStackTrace();
    }

    DataSet data = im.simulateDataRecursive(1000, false);

    GeneralizedSemEstimator estimator = new GeneralizedSemEstimator();
    GeneralizedSemIm estIm = estimator.estimate(pm, data);

    print(estIm);
    //        System.out.println(estimator.getReport());

    double aSquaredStar = estimator.getaSquaredStar();

    assertEquals(0.69, aSquaredStar, 0.01);
  }
  private double paramValue(SemIm im, Parameter parameter) {
    double paramValue = im.getParamValue(parameter);

    if (parameter.getType() == ParamType.VAR) {
      paramValue = Math.sqrt(paramValue);
    }

    return paramValue;
  }
  public ImpliedCovarianceDataAllWrapper(SemEstimatorWrapper wrapper) {
    //        int sampleSize = params.getSampleSize();
    //        boolean latentDataSaved = params.isIncludeLatents();
    SemEstimator semEstimator = wrapper.getSemEstimator();
    SemIm semIm1 = semEstimator.getEstimatedSem();

    if (semIm1 != null) {

      TetradMatrix matrix2D = semIm1.getImplCovar(true);
      int sampleSize = semIm1.getSampleSize();
      List<Node> variables =
          wrapper.getSemEstimator().getEstimatedSem().getSemPm().getVariableNodes();
      CovarianceMatrix cov = new CovarianceMatrix(variables, matrix2D, sampleSize);
      setDataModel(cov);
      setSourceGraph(wrapper.getSemEstimator().getEstimatedSem().getSemPm().getGraph());
      this.semIm = wrapper.getEstimatedSemIm();
    }

    LogDataUtils.logDataModelList(
        "Data simulated from a linear structural equation model.", getDataModelList());
  }
  private double getClusterP2(List<Node> c) {
    Graph g = new EdgeListGraph(c);
    Node l = new GraphNode("L");
    l.setNodeType(NodeType.LATENT);
    g.addNode(l);

    for (Node n : c) {
      g.addDirectedEdge(l, n);
    }

    SemPm pm = new SemPm(g);
    SemEstimator est;
    if (dataModel instanceof DataSet) {
      est = new SemEstimator((DataSet) dataModel, pm, new SemOptimizerEm());
    } else {
      est = new SemEstimator((CovarianceMatrix) dataModel, pm, new SemOptimizerEm());
    }
    SemIm estIm = est.estimate();
    double pValue = estIm.getPValue();
    return pValue == 1 ? Double.NaN : pValue;
  }
Esempio n. 7
0
  public void rtestDSeparation4() {
    Graph graph = new Dag(GraphUtils.randomGraph(100, 20, 100, 5, 5, 5, false));

    long start, stop;
    int depth = -1;

    IndependenceTest test = new IndTestDSep(graph);

    Rfci fci = new Rfci(test);
    Fas fas = new Fas(test);
    start = System.currentTimeMillis();
    fci.setDepth(depth);
    fci.setVerbose(true);
    fci.search(fas, fas.getNodes());
    stop = System.currentTimeMillis();

    System.out.println("DSEP RFCI");
    System.out.println("# dsep checks = " + fas.getNumIndependenceTests());
    System.out.println("Elapsed " + (stop - start));
    System.out.println("Per " + fas.getNumIndependenceTests() / (double) (stop - start));

    SemPm pm = new SemPm(graph);
    SemIm im = new SemIm(pm);
    DataSet data = im.simulateData(1000, false);
    IndependenceTest test2 = new IndTestFisherZ(data, 0.001);

    Rfci fci3 = new Rfci(test2);
    Fas fas2 = new Fas(test2);
    start = System.currentTimeMillis();
    fci3.setDepth(depth);
    fci3.search(fas2, fas2.getNodes());
    stop = System.currentTimeMillis();

    System.out.println("FISHER Z RFCI");
    System.out.println("# indep checks = " + fas.getNumIndependenceTests());
    System.out.println("Elapsed " + (stop - start));
    System.out.println("Per " + fas.getNumIndependenceTests() / (double) (stop - start));
  }
Esempio n. 8
0
  public void testManualDiscretize3() {
    Graph graph = new Dag(GraphUtils.randomGraph(5, 0, 5, 3, 3, 3, false));
    SemPm pm = new SemPm(graph);
    SemIm im = new SemIm(pm);
    DataSet data = im.simulateData(100, false);

    List<Node> nodes = data.getVariables();

    Discretizer discretizer = new Discretizer(data);
    discretizer.setVariablesCopied(true);

    discretizer.setVariablesCopied(true);
    discretizer.equalCounts(nodes.get(0), 3);

    DataSet discretized = discretizer.discretize();

    System.out.println(discretized);

    assertTrue(discretized.getVariable(0) instanceof DiscreteVariable);
    assertTrue(discretized.getVariable(1) instanceof ContinuousVariable);
    assertTrue(discretized.getVariable(2) instanceof ContinuousVariable);
    assertTrue(discretized.getVariable(3) instanceof ContinuousVariable);
    assertTrue(discretized.getVariable(4) instanceof ContinuousVariable);
  }
Esempio n. 9
0
  @Test
  public void test5() {
    RandomUtil.getInstance().setSeed(29999483L);

    List<Node> nodes = new ArrayList<>();

    for (int i1 = 0; i1 < 5; i1++) {
      nodes.add(new ContinuousVariable("X" + (i1 + 1)));
    }

    Graph graph = new Dag(GraphUtils.randomGraph(nodes, 0, 5, 30, 15, 15, false));
    SemPm semPm = new SemPm(graph);
    SemIm semIm = new SemIm(semPm);

    semIm.simulateDataReducedForm(1000, false);

    GeneralizedSemPm pm = new GeneralizedSemPm(semPm);
    GeneralizedSemIm im = new GeneralizedSemIm(pm, semIm);

    TetradVector e = new TetradVector(5);

    for (int i = 0; i < e.size(); i++) {
      e.set(i, RandomUtil.getInstance().nextNormal(0, 1));
    }

    TetradVector record1 = semIm.simulateOneRecord(e);
    TetradVector record2 = im.simulateOneRecord(e);

    print("XXX1" + e);
    print("XXX2" + record1);
    print("XXX3" + record2);

    for (int i = 0; i < record1.size(); i++) {
      assertEquals(record1.get(i), record2.get(i), 1e-10);
    }
  }
Esempio n. 10
0
  private String reportIfContinuous(Graph dag, DataSet dataSet) {
    SemPm semPm = new SemPm(dag);

    SemEstimator estimator = new SemEstimator(dataSet, semPm);
    estimator.estimate();
    SemIm semIm = estimator.getEstimatedSem();

    NumberFormat nf = NumberFormat.getInstance();
    nf.setMaximumFractionDigits(4);

    StringBuilder buf = new StringBuilder();
    buf.append("\nDegrees of Freedom = ")
        .append(semPm.getDof())
        .append("Chi-Square = ")
        .append(nf.format(semIm.getChiSquare()))
        .append("\nP Value = ")
        .append(nf.format(semIm.getPValue()))
        .append("\nBIC Score = ")
        .append(nf.format(semIm.getBicScore()));

    buf.append(
        "\n\nThe above chi square test assumes that the maximum "
            + "likelihood function over the measured variables has been "
            + "maximized. Under that assumption, the null hypothesis for "
            + "the test is that the population covariance matrix over all "
            + "of the measured variables is equal to the estimated covariance "
            + "matrix over all of the measured variables written as a function "
            + "of the free model parameters--that is, the unfixed parameters "
            + "for each directed edge (the linear coefficient for that edge), "
            + "each exogenous variable (the variance for the error term for "
            + "that variable), and each bidirected edge (the covariance for "
            + "the exogenous variables it connects).  The model is explained "
            + "in Bollen, Structural Equations with Latent Variable, 110. ");

    return buf.toString();
  }
Esempio n. 11
0
  public void test1() {
    for (int r = 0; r < 1; r++) {
      Graph mim = DataGraphUtils.randomSingleFactorModel(5, 5, 6, 0, 0, 0);

      Graph mimStructure = structure(mim);

      SemImInitializationParams params = new SemImInitializationParams();
      params.setCoefRange(.5, 1.5);

      SemPm pm = new SemPm(mim);
      SemIm im = new SemIm(pm, params);
      DataSet data = im.simulateData(300, false);

      String algorithm = "FOFC";
      Graph searchGraph;
      List<List<Node>> partition;

      if (algorithm.equals("FOFC")) {
        FindOneFactorClusters fofc =
            new FindOneFactorClusters(data, TestType.TETRAD_WISHART, 0.001);
        searchGraph = fofc.search();
        partition = fofc.getClusters();
      } else if (algorithm.equals("BPC")) {
        TestType testType = TestType.TETRAD_WISHART;
        TestType purifyType = TestType.TETRAD_BASED2;

        BuildPureClusters bpc = new BuildPureClusters(data, 0.001, testType, purifyType);
        searchGraph = bpc.search();

        partition = MimUtils.convertToClusters2(searchGraph);
      } else {
        throw new IllegalStateException();
      }

      List<String> latentVarList = reidentifyVariables(mim, data, partition, 2);

      System.out.println(partition);
      System.out.println(latentVarList);

      System.out.println("True\n" + mimStructure);

      Graph mimbuildStructure;

      for (int mimbuildMethod : new int[] {3, 4}) {
        if (mimbuildMethod == 1) {
          System.out.println("Mimbuild 1\n");
          Clusters measurements = ClusterUtils.mimClusters(searchGraph);
          IndTestMimBuild test = new IndTestMimBuild(data, 0.001, measurements);
          MimBuild mimbuild = new MimBuild(test, new Knowledge2());
          Graph full = mimbuild.search();
          full = changeLatentNames(full, measurements, latentVarList);
          mimbuildStructure = structure(full);
          System.out.println(
              "SHD = "
                  + SearchGraphUtils.structuralHammingDistance(mimStructure, mimbuildStructure));
          System.out.println("Estimated\n" + mimbuildStructure);
          System.out.println();
        }
        //                else if (mimbuildMethod == 2) {
        //                    System.out.println("Mimbuild 2\n");
        //                    Mimbuild2 mimbuild = new Mimbuild2();
        //                    mimbuild.setAlpha(0.001);
        //                    mimbuildStructure = mimbuild.search(partition, latentVarList, data);
        //                    TetradMatrix latentcov = mimbuild.getLatentsCov();
        //                    List<String> latentnames = mimbuild.getLatentNames();
        //                    System.out.println("\nCovariance over the latents");
        //                    System.out.println(MatrixUtils.toStringSquare(latentcov.toArray(),
        // latentnames));
        //                    System.out.println("Estimated\n" + mimbuildStructure);
        //                    System.out.println("SHD = " +
        // SearchGraphUtils.structuralHammingDistance(mimStructure, mimbuildStructure));
        //                    System.out.println();
        //                }
        else if (mimbuildMethod == 3) {
          System.out.println("Mimbuild 3\n");
          Mimbuild2 mimbuild = new Mimbuild2();
          mimbuild.setAlpha(0.001);
          mimbuild.setMinClusterSize(3);
          mimbuildStructure = mimbuild.search(partition, latentVarList, new CovarianceMatrix(data));
          ICovarianceMatrix latentcov = mimbuild.getLatentsCov();
          System.out.println("\nCovariance over the latents");
          System.out.println(latentcov);
          System.out.println("Estimated\n" + mimbuildStructure);
          System.out.println(
              "SHD = "
                  + SearchGraphUtils.structuralHammingDistance(mimStructure, mimbuildStructure));
          System.out.println();
        } else if (mimbuildMethod == 4) {
          System.out.println("Mimbuild Trek\n");
          MimbuildTrek mimbuild = new MimbuildTrek();
          mimbuild.setAlpha(0.1);
          mimbuild.setMinClusterSize(3);
          mimbuildStructure = mimbuild.search(partition, latentVarList, new CovarianceMatrix(data));
          ICovarianceMatrix latentcov = mimbuild.getLatentsCov();
          System.out.println("\nCovariance over the latents");
          System.out.println(latentcov);
          System.out.println("Estimated\n" + mimbuildStructure);
          System.out.println(
              "SHD = "
                  + SearchGraphUtils.structuralHammingDistance(mimStructure, mimbuildStructure));
          System.out.println();
        } else {
          throw new IllegalStateException();
        }
      }
    }
  }
Esempio n. 12
0
  public void rtest4() {
    System.out.println("SHD\tP");
    //        System.out.println("MB1\tMB2\tMB3\tMB4\tMB5\tMB6");

    Graph mim = DataGraphUtils.randomSingleFactorModel(5, 5, 8, 0, 0, 0);

    Graph mimStructure = structure(mim);

    SemPm pm = new SemPm(mim);
    SemImInitializationParams params = new SemImInitializationParams();
    params.setCoefRange(0.5, 1.5);

    NumberFormat nf = new DecimalFormat("0.0000");

    int totalError = 0;
    int errorCount = 0;

    int maxScore = 0;
    int maxNumMeasures = 0;
    double maxP = 0.0;

    for (int r = 0; r < 1; r++) {
      SemIm im = new SemIm(pm, params);

      DataSet data = im.simulateData(1000, false);

      mim = GraphUtils.replaceNodes(mim, data.getVariables());
      List<List<Node>> trueClusters = MimUtils.convertToClusters2(mim);

      CovarianceMatrix _cov = new CovarianceMatrix(data);

      ICovarianceMatrix cov = DataUtils.reorderColumns(_cov);

      String algorithm = "FOFC";
      Graph searchGraph;
      List<List<Node>> partition;

      if (algorithm.equals("FOFC")) {
        FindOneFactorClusters fofc =
            new FindOneFactorClusters(cov, TestType.TETRAD_WISHART, 0.001f);
        searchGraph = fofc.search();
        searchGraph = GraphUtils.replaceNodes(searchGraph, data.getVariables());
        partition = MimUtils.convertToClusters2(searchGraph);
      } else if (algorithm.equals("BPC")) {
        TestType testType = TestType.TETRAD_WISHART;
        TestType purifyType = TestType.TETRAD_BASED2;

        BuildPureClusters bpc = new BuildPureClusters(data, 0.001, testType, purifyType);
        searchGraph = bpc.search();

        partition = MimUtils.convertToClusters2(searchGraph);
      } else {
        throw new IllegalStateException();
      }

      mimStructure = GraphUtils.replaceNodes(mimStructure, data.getVariables());

      List<String> latentVarList = reidentifyVariables(mim, data, partition, 2);

      Graph mimbuildStructure;

      Mimbuild2 mimbuild = new Mimbuild2();
      mimbuild.setAlpha(0.001);
      mimbuild.setMinClusterSize(3);

      try {
        mimbuildStructure = mimbuild.search(partition, latentVarList, cov);
      } catch (Exception e) {
        e.printStackTrace();
        continue;
      }

      mimbuildStructure = GraphUtils.replaceNodes(mimbuildStructure, data.getVariables());
      mimbuildStructure = condense(mimStructure, mimbuildStructure);

      //            Graph mimSubgraph = new EdgeListGraph(mimStructure);
      //
      //            for (Node node : mimSubgraph.getNodes()) {
      //                if (!mimStructure.getNodes().contains(node)) {
      //                    mimSubgraph.removeNode(node);
      //                }
      //            }

      int shd = SearchGraphUtils.structuralHammingDistance(mimStructure, mimbuildStructure);
      boolean impureCluster = containsImpureCluster(partition, trueClusters);
      double pValue = mimbuild.getpValue();
      boolean pBelow05 = pValue < 0.05;
      boolean numClustersGreaterThan5 = partition.size() != 5;
      boolean error = false;

      //            boolean condition = impureCluster || numClustersGreaterThan5 || pBelow05;
      //            boolean condition = numClustersGreaterThan5 || pBelow05;
      boolean condition = numClustered(partition) == 40;

      if (!condition && (shd > 5)) {
        error = true;
      }

      if (!condition) {
        totalError += shd;
        errorCount++;
      }

      //            if (numClustered(partition) > maxNumMeasures) {
      //                maxNumMeasures = numClustered(partition);
      //                maxP = pValue;
      //                maxScore = shd;
      //                System.out.println("maxNumMeasures = " + maxNumMeasures);
      //                System.out.println("maxScore = " + maxScore);
      //                System.out.println("maxP = " + maxP);
      //                System.out.println("clusters = " + clusterSizes(partition, trueClusters));
      //            }
      //            else
      if (pValue > maxP) {
        maxScore = shd;
        maxP = mimbuild.getpValue();
        maxNumMeasures = numClustered(partition);
        System.out.println("maxNumMeasures = " + maxNumMeasures);
        System.out.println("maxScore = " + maxScore);
        System.out.println("maxP = " + maxP);
        System.out.println("clusters = " + clusterSizes(partition, trueClusters));
      }

      System.out.print(
          shd
              + "\t"
              + nf.format(pValue)
              + " "
              //                            + (error ? 1 : 0) + " "
              //                            + (pBelow05 ? "P < 0.05 " : "")
              //                            + (impureCluster ? "Impure cluster " : "")
              //                            + (numClustersGreaterThan5 ? "# Clusters != 5 " : "")
              //                            + clusterSizes(partition, trueClusters)
              + numClustered(partition));

      System.out.println();
    }

    System.out.println("\nAvg SHD for not-flagged cases = " + (totalError / (double) errorCount));

    System.out.println("maxNumMeasures = " + maxNumMeasures);
    System.out.println("maxScore = " + maxScore);
    System.out.println("maxP = " + maxP);
  }
Esempio n. 13
0
  public void rtest3() {
    Node x = new GraphNode("X");
    Node y = new GraphNode("Y");
    Node z = new GraphNode("Z");
    Node w = new GraphNode("W");

    List<Node> nodes = new ArrayList<Node>();
    nodes.add(x);
    nodes.add(y);
    nodes.add(z);
    nodes.add(w);

    Graph g = new EdgeListGraph(nodes);
    g.addDirectedEdge(x, y);
    g.addDirectedEdge(x, z);
    g.addDirectedEdge(y, w);
    g.addDirectedEdge(z, w);

    Graph maxGraph = null;
    double maxPValue = -1.0;
    ICovarianceMatrix maxLatentCov = null;

    Graph mim = DataGraphUtils.randomMim(g, 8, 0, 0, 0, true);
    //        Graph mim = DataGraphUtils.randomSingleFactorModel(5, 5, 8, 0, 0, 0);
    Graph mimStructure = structure(mim);
    SemPm pm = new SemPm(mim);

    System.out.println("\n\nTrue graph:");
    System.out.println(mimStructure);

    SemImInitializationParams params = new SemImInitializationParams();
    params.setCoefRange(0.5, 1.5);

    SemIm im = new SemIm(pm, params);

    int N = 1000;

    DataSet data = im.simulateData(N, false);

    CovarianceMatrix cov = new CovarianceMatrix(data);

    for (int i = 0; i < 1; i++) {

      ICovarianceMatrix _cov = DataUtils.reorderColumns(cov);
      List<List<Node>> partition;

      FindOneFactorClusters fofc = new FindOneFactorClusters(_cov, TestType.TETRAD_WISHART, .001);
      fofc.search();
      partition = fofc.getClusters();
      System.out.println(partition);

      List<String> latentVarList = reidentifyVariables(mim, data, partition, 2);

      Mimbuild2 mimbuild = new Mimbuild2();

      mimbuild.setAlpha(0.001);
      //            mimbuild.setMinimumSize(5);

      // To test knowledge.
      //            Knowledge knowledge = new Knowledge2();
      //            knowledge.setEdgeForbidden("L.Y", "L.W", true);
      //            knowledge.setEdgeRequired("L.Y", "L.Z", true);
      //            mimbuild.setKnowledge(knowledge);

      Graph mimbuildStructure = mimbuild.search(partition, latentVarList, _cov);

      double pValue = mimbuild.getpValue();
      System.out.println(mimbuildStructure);
      System.out.println("P = " + pValue);
      System.out.println("Latent Cov = " + mimbuild.getLatentsCov());

      if (pValue > maxPValue) {
        maxPValue = pValue;
        maxGraph = new EdgeListGraph(mimbuildStructure);
        maxLatentCov = mimbuild.getLatentsCov();
      }
    }

    System.out.println("\n\nTrue graph:");
    System.out.println(mimStructure);
    System.out.println("\nBest graph:");
    System.out.println(maxGraph);
    System.out.println("P = " + maxPValue);
    System.out.println("Latent Cov = " + maxLatentCov);
    System.out.println();
  }
Esempio n. 14
0
  @Test
  public void testHistogram() {
    RandomUtil.getInstance().setSeed(4829384L);

    List<Node> nodes = new ArrayList<Node>();

    for (int i = 0; i < 5; i++) {
      nodes.add(new ContinuousVariable("X" + (i + 1)));
    }

    Dag trueGraph = new Dag(GraphUtils.randomGraph(nodes, 0, 5, 30, 15, 15, false));
    int sampleSize = 1000;

    // Continuous
    SemPm semPm = new SemPm(trueGraph);
    SemIm semIm = new SemIm(semPm);
    DataSet data = semIm.simulateData(sampleSize, false);

    Histogram histogram = new Histogram(data);
    histogram.setTarget("X1");
    histogram.setNumBins(20);

    assertEquals(3.76, histogram.getMax(), 0.01);
    assertEquals(-3.83, histogram.getMin(), 0.01);
    assertEquals(1000, histogram.getN());

    histogram.setTarget("X1");
    histogram.setNumBins(10);
    histogram.addConditioningVariable("X3", 0, 1);
    histogram.addConditioningVariable("X4", 0, 1);

    histogram.removeConditioningVariable("X3");

    assertEquals(3.76, histogram.getMax(), 0.01);
    assertEquals(-3.83, histogram.getMin(), 0.01);
    assertEquals(188, histogram.getN());

    double[] arr = histogram.getContinuousData("X2");
    histogram.addConditioningVariable("X2", StatUtils.min(arr), StatUtils.mean(arr));

    // Discrete
    BayesPm bayesPm = new BayesPm(trueGraph);
    BayesIm bayesIm = new MlBayesIm(bayesPm, MlBayesIm.RANDOM);
    DataSet data2 = bayesIm.simulateData(sampleSize, false);

    // For some reason these are giving different
    // values when all of the unit tests are run are
    // once. TODO They produce stable values when
    // this particular test is run repeatedly.
    Histogram histogram2 = new Histogram(data2);
    histogram2.setTarget("X1");
    int[] frequencies1 = histogram2.getFrequencies();
    //        assertEquals(928, frequencies1[0]);
    //        assertEquals(72, frequencies1[1]);

    histogram2.setTarget("X1");
    histogram2.addConditioningVariable("X2", 0);
    histogram2.addConditioningVariable("X3", 1);
    int[] frequencies = histogram2.getFrequencies();
    //        assertEquals(377, frequencies[0]);
    //        assertEquals(28, frequencies[1]);
  }
Esempio n. 15
0
  private String compileReport() {
    StringBuilder builder = new StringBuilder();

    builder.append("Datset\tFrom\tTo\tType\tValue\tSE\tT\tP");

    java.util.List<SemEstimator> estimators = wrapper.getMultipleResultList();

    for (int i = 0; i < estimators.size(); i++) {
      SemEstimator estimator = estimators.get(i);

      SemIm estSem = estimator.getEstimatedSem();
      String dataName = estimator.getDataSet().getName();

      for (Parameter parameter : estSem.getFreeParameters()) {
        builder.append("\n");
        builder.append(dataName + "\t");
        builder.append(parameter.getNodeA() + "\t");
        builder.append(parameter.getNodeB() + "\t");
        builder.append(typeString(parameter) + "\t");
        builder.append(asString(paramValue(estSem, parameter)) + "\t");
        /*
         Maximum number of free parameters for which statistics will be
         calculated. (Calculating standard errors is high complexity.) Set this to
         zero to turn  off statistics calculations (which can be problematic
         sometimes).
        */
        int maxFreeParamsForStatistics = 200;
        builder.append(
            asString(estSem.getStandardError(parameter, maxFreeParamsForStatistics)) + "\t");
        builder.append(asString(estSem.getTValue(parameter, maxFreeParamsForStatistics)) + "\t");
        builder.append(asString(estSem.getPValue(parameter, maxFreeParamsForStatistics)) + "\t");
      }

      List<Node> nodes = estSem.getVariableNodes();

      for (int j = 0; j < nodes.size(); j++) {
        Node node = nodes.get(j);

        int n = estSem.getSampleSize();
        int df = n - 1;
        double mean = estSem.getMean(node);
        double stdDev = estSem.getMeanStdDev(node);
        double stdErr = stdDev / Math.sqrt(n);

        double tValue = mean / stdErr;
        double p = 2.0 * (1.0 - ProbUtils.tCdf(Math.abs(tValue), df));

        builder.append("\n");
        builder.append(dataName + "\t");
        builder.append(nodes.get(j) + "\t");
        builder.append(nodes.get(j) + "\t");
        builder.append("Mean" + "\t");
        builder.append(asString(mean) + "\t");
        builder.append(asString(stdErr) + "\t");
        builder.append(asString(tValue) + "\t");
        builder.append(asString(p) + "\t");
      }
    }

    return builder.toString();
  }