Ejemplo n.º 1
0
  @Test
  public void test2() {
    RandomUtil.getInstance().setSeed(2999983L);

    int sampleSize = 1000;

    List<Node> variableNodes = new ArrayList<>();
    ContinuousVariable x1 = new ContinuousVariable("X1");
    ContinuousVariable x2 = new ContinuousVariable("X2");
    ContinuousVariable x3 = new ContinuousVariable("X3");
    ContinuousVariable x4 = new ContinuousVariable("X4");
    ContinuousVariable x5 = new ContinuousVariable("X5");

    variableNodes.add(x1);
    variableNodes.add(x2);
    variableNodes.add(x3);
    variableNodes.add(x4);
    variableNodes.add(x5);

    Graph _graph = new EdgeListGraph(variableNodes);
    SemGraph graph = new SemGraph(_graph);
    graph.addDirectedEdge(x1, x3);
    graph.addDirectedEdge(x2, x3);
    graph.addDirectedEdge(x3, x4);
    graph.addDirectedEdge(x2, x4);
    graph.addDirectedEdge(x4, x5);
    graph.addDirectedEdge(x2, x5);

    SemPm semPm = new SemPm(graph);
    SemIm semIm = new SemIm(semPm);
    DataSet dataSet = semIm.simulateData(sampleSize, false);

    print(semPm);

    GeneralizedSemPm _semPm = new GeneralizedSemPm(semPm);
    GeneralizedSemIm _semIm = new GeneralizedSemIm(_semPm, semIm);
    DataSet _dataSet = _semIm.simulateDataMinimizeSurface(sampleSize, false);

    print(_semPm);

    //        System.out.println(_dataSet);

    for (int j = 0; j < dataSet.getNumColumns(); j++) {
      double[] col = dataSet.getDoubleData().getColumn(j).toArray();
      double[] _col = _dataSet.getDoubleData().getColumn(j).toArray();

      double mean = StatUtils.mean(col);
      double _mean = StatUtils.mean(_col);

      double variance = StatUtils.variance(col);
      double _variance = StatUtils.variance(_col);

      assertEquals(mean, _mean, 0.3);
      assertEquals(1.0, variance / _variance, .2);
    }
  }
Ejemplo n.º 2
0
  private double score(Node y, List<Node> parents) {
    if (score == Score.andersonDarling) {
      return andersonDarlingPASquareStar(y, parents);
    } else if (score == Score.kurtosis) {
      return Math.abs(StatUtils.kurtosis(residual(y, parents)));
    } else if (score == Score.skew) {
      return Math.abs(StatUtils.skewness(residual(y, parents)));
    } else if (score == Score.fifthMoment) {
      return Math.abs(StatUtils.standardizedFifthMoment(residual(y, parents)));
    } else if (score == Score.absoluteValue) {
      return localScoreA(y, parents);
    }

    throw new IllegalStateException();
  }
  public boolean isIndependent(Node x, Node y, List<Node> z) {
    int[] all = new int[z.size() + 2];
    all[0] = variablesMap.get(x);
    all[1] = variablesMap.get(y);
    for (int i = 0; i < z.size(); i++) {
      all[i + 2] = variablesMap.get(z.get(i));
    }

    int sampleSize = data.get(0).rows();
    List<Double> pValues = new ArrayList<Double>();

    for (int m = 0; m < ncov.size(); m++) {
      TetradMatrix _ncov = ncov.get(m).getSelection(all, all);
      TetradMatrix inv = _ncov.inverse();
      double r = -inv.get(0, 1) / sqrt(inv.get(0, 0) * inv.get(1, 1));

      double fisherZ =
          sqrt(sampleSize - z.size() - 3.0) * 0.5 * (Math.log(1.0 + r) - Math.log(1.0 - r));
      double pValue;

      if (Double.isInfinite(fisherZ)) {
        pValue = 0;
      } else {
        pValue = 2.0 * (1.0 - RandomUtil.getInstance().normalCdf(0, 1, abs(fisherZ)));
      }

      pValues.add(pValue);
    }

    double _cutoff = alpha;

    if (fdr) {
      _cutoff = StatUtils.fdrCutoff(alpha, pValues, false);
    }

    Collections.sort(pValues);
    int index = (int) round((1.0 - percent) * pValues.size());
    this.pValue = pValues.get(index);

    //        if (this.pValue == 0) {
    //            System.out.println("Zero pvalue "+ SearchLogUtils.independenceFactMsg(x, y, z,
    // getPValue()));
    //        }

    boolean independent = this.pValue > _cutoff;

    if (verbose) {
      if (independent) {
        TetradLogger.getInstance()
            .log("independencies", SearchLogUtils.independenceFactMsg(x, y, z, getPValue()));
        //            System.out.println(SearchLogUtils.independenceFactMsg(x, y, z, getPValue()));
      } else {
        TetradLogger.getInstance()
            .log("dependencies", SearchLogUtils.dependenceFactMsg(x, y, z, getPValue()));
      }
    }

    return independent;
  }
  /**
   * Determines whether variable x is independent of variable y given a list of conditioning
   * variables z.
   *
   * @param x the one variable being compared.
   * @param y the second variable being compared.
   * @param z the list of conditioning variables.
   * @return true iff x _||_ y | z.
   * @throws RuntimeException if a matrix singularity is encountered.
   */
  public boolean isIndependent(Node x, Node y, List<Node> z) {
    TetradMatrix submatrix = subMatrix(x, y, z);
    double r = 0;

    try {
      r = StatUtils.partialCorrelation(submatrix);

      if (Double.isNaN((r)) || r < -1. || r > 1.) throw new RuntimeException();
    } catch (Exception e) {
      DepthChoiceGenerator gen = new DepthChoiceGenerator(z.size(), z.size());
      int[] choice;

      while ((choice = gen.next()) != null) {
        try {
          List<Node> z2 = new ArrayList<Node>(z);
          z2.removeAll(GraphUtils.asList(choice, z));
          submatrix = subMatrix(x, y, z2);
          r = StatUtils.partialCorrelation(submatrix);
        } catch (Exception e2) {
          continue;
        }

        //                if (Double.isNaN(r)) continue;
        //
        //                if (r > 1.) r = 1.;
        //                 if (r < -1.) r = -1.;

        if (Double.isNaN(r) || r < -1. || r > 1.) continue;

        break;
      }
    }

    // Either dividing by a zero standard deviation (in which case it's dependent) or doing a
    // regression
    // (effectively) with a multicolliarity
    if (Double.isNaN(r)) {
      int[] _z = new int[z.size()];
      //            for (int i = 0; i < _z.length; i++) _z[i] = i + 2;
      //
      ////            double varx = StatUtils.partialVariance(submatrix, 0, _z); // submatrix.get(0,
      // 0);
      ////            double vary = StatUtils.partialVariance(submatrix, 1, _z); //submatrix.get(1,
      // 1);
      //
      //            double varx = submatrix.get(0, 0);
      //            double vary = submatrix.get(1, 1);
      //
      //            if (varx * vary == 0) {
      return true;
      //            }
    }

    if (r > 1.) r = 1.;
    if (r < -1.) r = -1.;

    this.fisherZ =
        Math.sqrt(sampleSize() - z.size() - 3.0) * 0.5 * (Math.log(1.0 + r) - Math.log(1.0 - r));

    if (Double.isNaN(this.fisherZ)) {
      throw new IllegalArgumentException(
          "The Fisher's Z "
              + "score for independence fact "
              + x
              + " _||_ "
              + y
              + " | "
              + z
              + " is undefined. r = "
              + r);
    }

    boolean independent = getPValue() > alpha;

    if (independent) {
      TetradLogger.getInstance()
          .log("independencies", SearchLogUtils.independenceFactMsg(x, y, z, getPValue()));
    } else {
      TetradLogger.getInstance()
          .log("dependencies", SearchLogUtils.dependenceFactMsg(x, y, z, getPValue()));
    }

    return independent;
  }
Ejemplo n.º 5
0
  @Test
  public void testHistogram() {
    RandomUtil.getInstance().setSeed(4829384L);

    List<Node> nodes = new ArrayList<Node>();

    for (int i = 0; i < 5; i++) {
      nodes.add(new ContinuousVariable("X" + (i + 1)));
    }

    Dag trueGraph = new Dag(GraphUtils.randomGraph(nodes, 0, 5, 30, 15, 15, false));
    int sampleSize = 1000;

    // Continuous
    SemPm semPm = new SemPm(trueGraph);
    SemIm semIm = new SemIm(semPm);
    DataSet data = semIm.simulateData(sampleSize, false);

    Histogram histogram = new Histogram(data);
    histogram.setTarget("X1");
    histogram.setNumBins(20);

    assertEquals(3.76, histogram.getMax(), 0.01);
    assertEquals(-3.83, histogram.getMin(), 0.01);
    assertEquals(1000, histogram.getN());

    histogram.setTarget("X1");
    histogram.setNumBins(10);
    histogram.addConditioningVariable("X3", 0, 1);
    histogram.addConditioningVariable("X4", 0, 1);

    histogram.removeConditioningVariable("X3");

    assertEquals(3.76, histogram.getMax(), 0.01);
    assertEquals(-3.83, histogram.getMin(), 0.01);
    assertEquals(188, histogram.getN());

    double[] arr = histogram.getContinuousData("X2");
    histogram.addConditioningVariable("X2", StatUtils.min(arr), StatUtils.mean(arr));

    // Discrete
    BayesPm bayesPm = new BayesPm(trueGraph);
    BayesIm bayesIm = new MlBayesIm(bayesPm, MlBayesIm.RANDOM);
    DataSet data2 = bayesIm.simulateData(sampleSize, false);

    // For some reason these are giving different
    // values when all of the unit tests are run are
    // once. TODO They produce stable values when
    // this particular test is run repeatedly.
    Histogram histogram2 = new Histogram(data2);
    histogram2.setTarget("X1");
    int[] frequencies1 = histogram2.getFrequencies();
    //        assertEquals(928, frequencies1[0]);
    //        assertEquals(72, frequencies1[1]);

    histogram2.setTarget("X1");
    histogram2.addConditioningVariable("X2", 0);
    histogram2.addConditioningVariable("X3", 1);
    int[] frequencies = histogram2.getFrequencies();
    //        assertEquals(377, frequencies[0]);
    //        assertEquals(28, frequencies[1]);
  }
Ejemplo n.º 6
0
  @Test
  public void test3() {
    RandomUtil.getInstance().setSeed(49293843L);

    List<Node> variableNodes = new ArrayList<>();
    ContinuousVariable x1 = new ContinuousVariable("X1");
    ContinuousVariable x2 = new ContinuousVariable("X2");
    ContinuousVariable x3 = new ContinuousVariable("X3");
    ContinuousVariable x4 = new ContinuousVariable("X4");
    ContinuousVariable x5 = new ContinuousVariable("X5");

    variableNodes.add(x1);
    variableNodes.add(x2);
    variableNodes.add(x3);
    variableNodes.add(x4);
    variableNodes.add(x5);

    Graph _graph = new EdgeListGraph(variableNodes);
    SemGraph graph = new SemGraph(_graph);
    graph.setShowErrorTerms(true);

    Node e1 = graph.getExogenous(x1);
    Node e2 = graph.getExogenous(x2);
    Node e3 = graph.getExogenous(x3);
    Node e4 = graph.getExogenous(x4);
    Node e5 = graph.getExogenous(x5);

    graph.addDirectedEdge(x1, x3);
    graph.addDirectedEdge(x1, x2);
    graph.addDirectedEdge(x2, x3);
    graph.addDirectedEdge(x3, x4);
    graph.addDirectedEdge(x2, x4);
    graph.addDirectedEdge(x4, x5);
    graph.addDirectedEdge(x2, x5);
    graph.addDirectedEdge(x5, x1);

    GeneralizedSemPm pm = new GeneralizedSemPm(graph);

    List<Node> variablesNodes = pm.getVariableNodes();
    print(variablesNodes);

    List<Node> errorNodes = pm.getErrorNodes();
    print(errorNodes);

    try {
      pm.setNodeExpression(x1, "cos(b1) + a1 * X5 + E_X1");
      pm.setNodeExpression(x2, "a2 * X1 + E_X2");
      pm.setNodeExpression(x3, "tan(a3*X2 + a4*X1) + E_X3");
      pm.setNodeExpression(x4, "0.1 * E^X2 + X3 + E_X4");
      pm.setNodeExpression(x5, "0.1 * E^X4 + a6* X2 + E_X5");
      pm.setNodeExpression(e1, "U(0, 1)");
      pm.setNodeExpression(e2, "U(0, 1)");
      pm.setNodeExpression(e3, "U(0, 1)");
      pm.setNodeExpression(e4, "U(0, 1)");
      pm.setNodeExpression(e5, "U(0, 1)");

      GeneralizedSemIm im = new GeneralizedSemIm(pm);

      print(im);

      DataSet dataSet = im.simulateDataNSteps(1000, false);

      //            System.out.println(dataSet);

      double[] d1 = dataSet.getDoubleData().getColumn(0).toArray();
      double[] d2 = dataSet.getDoubleData().getColumn(1).toArray();

      double cov = StatUtils.covariance(d1, d2);

      assertEquals(-0.002, cov, 0.001);
    } catch (ParseException e) {
      e.printStackTrace();
    }
  }