Пример #1
0
  /**
   * Calculates the sample likelihood and BIC score for i given its parents in a simple SEM model.
   */
  private double localSemScore(int i, int[] parents) {
    try {
      ICovarianceMatrix cov = getCovMatrix();
      double varianceY = cov.getValue(i, i);
      double residualVariance = varianceY;
      int n = sampleSize();
      int p = parents.length;
      int k = (p * (p + 1)) / 2 + p;
      //            int k = (p + 1) * (p + 1);
      //            int k = p + 1;
      TetradMatrix covxx = cov.getSelection(parents, parents);
      TetradMatrix covxxInv = covxx.inverse();
      TetradVector covxy = cov.getSelection(parents, new int[] {i}).getColumn(0);
      TetradVector b = covxxInv.times(covxy);
      residualVariance -= covxy.dotProduct(b);

      if (residualVariance <= 0 && verbose) {
        out.println(
            "Nonpositive residual varianceY: resVar / varianceY = "
                + (residualVariance / varianceY));
        return Double.NaN;
      }

      double c = getPenaltyDiscount();

      //            return -n * log(residualVariance) - 2 * k; //AIC
      return -n * Math.log(residualVariance) - c * k * Math.log(n);
      //            return -n * log(residualVariance) - c * k * (log(n) - log(2 * PI));
    } catch (Exception e) {
      e.printStackTrace();
      throw new RuntimeException(e);
      //            throwMinimalLinearDependentSet(parents, cov);
    }
  }
Пример #2
0
  private double pValue(Node node, List<Node> parents) {
    List<Double> _residuals = new ArrayList<Double>();

    Node _target = node;
    List<Node> _regressors = parents;
    Node target = getVariable(variables, _target.getName());
    List<Node> regressors = new ArrayList<Node>();

    for (Node _regressor : _regressors) {
      Node variable = getVariable(variables, _regressor.getName());
      regressors.add(variable);
    }

    DATASET:
    for (int m = 0; m < dataSets.size(); m++) {
      RegressionResult result = regressions.get(m).regress(target, regressors);
      TetradVector residualsSingleDataset = result.getResiduals();

      for (int h = 0; h < residualsSingleDataset.size(); h++) {
        if (Double.isNaN(residualsSingleDataset.get(h))) {
          continue DATASET;
        }
      }

      DoubleArrayList _residualsSingleDataset =
          new DoubleArrayList(residualsSingleDataset.toArray());

      double mean = Descriptive.mean(_residualsSingleDataset);
      double std =
          Descriptive.standardDeviation(
              Descriptive.variance(
                  _residualsSingleDataset.size(),
                  Descriptive.sum(_residualsSingleDataset),
                  Descriptive.sumOfSquares(_residualsSingleDataset)));

      for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) {
        //                _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean) /
        // std);
        if (isMeanCenterResiduals()) {
          _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean));
        }
        //                _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2)));
      }

      for (int k = 0; k < _residualsSingleDataset.size(); k++) {
        _residuals.add(_residualsSingleDataset.get(k));
      }
    }

    double[] _f = new double[_residuals.size()];

    for (int k = 0; k < _residuals.size(); k++) {
      _f[k] = _residuals.get(k);
    }

    return new AndersonDarlingTest(_f).getP();
  }
Пример #3
0
  @Test
  public void test5() {
    RandomUtil.getInstance().setSeed(29999483L);

    List<Node> nodes = new ArrayList<>();

    for (int i1 = 0; i1 < 5; i1++) {
      nodes.add(new ContinuousVariable("X" + (i1 + 1)));
    }

    Graph graph = new Dag(GraphUtils.randomGraph(nodes, 0, 5, 30, 15, 15, false));
    SemPm semPm = new SemPm(graph);
    SemIm semIm = new SemIm(semPm);

    semIm.simulateDataReducedForm(1000, false);

    GeneralizedSemPm pm = new GeneralizedSemPm(semPm);
    GeneralizedSemIm im = new GeneralizedSemIm(pm, semIm);

    TetradVector e = new TetradVector(5);

    for (int i = 0; i < e.size(); i++) {
      e.set(i, RandomUtil.getInstance().nextNormal(0, 1));
    }

    TetradVector record1 = semIm.simulateOneRecord(e);
    TetradVector record2 = im.simulateOneRecord(e);

    print("XXX1" + e);
    print("XXX2" + record1);
    print("XXX3" + record2);

    for (int i = 0; i < record1.size(); i++) {
      assertEquals(record1.get(i), record2.get(i), 1e-10);
    }
  }
Пример #4
0
  private double andersonDarlingPASquareStarB(Node node, List<Node> parents) {
    List<Double> _residuals = new ArrayList<Double>();

    Node _target = node;
    List<Node> _regressors = parents;
    Node target = getVariable(variables, _target.getName());
    List<Node> regressors = new ArrayList<Node>();

    for (Node _regressor : _regressors) {
      Node variable = getVariable(variables, _regressor.getName());
      regressors.add(variable);
    }

    double sum = 0.0;

    DATASET:
    for (int m = 0; m < dataSets.size(); m++) {
      RegressionResult result = regressions.get(m).regress(target, regressors);
      TetradVector residualsSingleDataset = result.getResiduals();

      for (int h = 0; h < residualsSingleDataset.size(); h++) {
        if (Double.isNaN(residualsSingleDataset.get(h))) {
          continue DATASET;
        }
      }

      DoubleArrayList _residualsSingleDataset =
          new DoubleArrayList(residualsSingleDataset.toArray());

      double mean = Descriptive.mean(_residualsSingleDataset);
      double std =
          Descriptive.standardDeviation(
              Descriptive.variance(
                  _residualsSingleDataset.size(),
                  Descriptive.sum(_residualsSingleDataset),
                  Descriptive.sumOfSquares(_residualsSingleDataset)));

      // By centering the individual residual columns, all moments of the mixture become weighted
      // averages of the moments
      // of the individual columns.
      // http://en.wikipedia.org/wiki/Mixture_distribution#Finite_and_countable_mixtures
      for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) {
        //                _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean) /
        // std);
        //                _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2)) / std);
        if (isMeanCenterResiduals()) {
          _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean));
        }
      }

      double[] _f = new double[_residuals.size()];

      for (int k = 0; k < _residuals.size(); k++) {
        _f[k] = _residuals.get(k);
      }

      sum += new AndersonDarlingTest(_f).getASquaredStar();
    }

    return sum / dataSets.size();
  }
Пример #5
0
  private double localScoreB(Node node, List<Node> parents) {

    double score = 0.0;
    double maxScore = Double.NEGATIVE_INFINITY;

    Node _target = node;
    List<Node> _regressors = parents;
    Node target = getVariable(variables, _target.getName());
    List<Node> regressors = new ArrayList<Node>();

    for (Node _regressor : _regressors) {
      Node variable = getVariable(variables, _regressor.getName());
      regressors.add(variable);
    }

    DATASET:
    for (int m = 0; m < dataSets.size(); m++) {
      RegressionResult result = regressions.get(m).regress(target, regressors);
      TetradVector residualsSingleDataset = result.getResiduals();
      DoubleArrayList _residualsSingleDataset =
          new DoubleArrayList(residualsSingleDataset.toArray());

      for (int h = 0; h < residualsSingleDataset.size(); h++) {
        if (Double.isNaN(residualsSingleDataset.get(h))) {
          continue DATASET;
        }
      }

      double mean = Descriptive.mean(_residualsSingleDataset);
      double std =
          Descriptive.standardDeviation(
              Descriptive.variance(
                  _residualsSingleDataset.size(),
                  Descriptive.sum(_residualsSingleDataset),
                  Descriptive.sumOfSquares(_residualsSingleDataset)));

      for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) {
        _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean) / std);
      }

      double[] _f = new double[_residualsSingleDataset.size()];

      for (int k = 0; k < _residualsSingleDataset.size(); k++) {
        _f[k] = _residualsSingleDataset.get(k);
      }

      DoubleArrayList f = new DoubleArrayList(_f);

      for (int k = 0; k < f.size(); k++) {
        f.set(k, Math.abs(f.get(k)));
      }

      double _mean = Descriptive.mean(f);
      double diff = _mean - Math.sqrt(2.0 / Math.PI);
      score += diff * diff;

      if (score > maxScore) {
        maxScore = score;
      }
    }

    double avg = score / dataSets.size();

    return avg;
  }