/** * Calculates the sample likelihood and BIC score for i given its parents in a simple SEM model. */ private double localSemScore(int i, int[] parents) { try { ICovarianceMatrix cov = getCovMatrix(); double varianceY = cov.getValue(i, i); double residualVariance = varianceY; int n = sampleSize(); int p = parents.length; int k = (p * (p + 1)) / 2 + p; // int k = (p + 1) * (p + 1); // int k = p + 1; TetradMatrix covxx = cov.getSelection(parents, parents); TetradMatrix covxxInv = covxx.inverse(); TetradVector covxy = cov.getSelection(parents, new int[] {i}).getColumn(0); TetradVector b = covxxInv.times(covxy); residualVariance -= covxy.dotProduct(b); if (residualVariance <= 0 && verbose) { out.println( "Nonpositive residual varianceY: resVar / varianceY = " + (residualVariance / varianceY)); return Double.NaN; } double c = getPenaltyDiscount(); // return -n * log(residualVariance) - 2 * k; //AIC return -n * Math.log(residualVariance) - c * k * Math.log(n); // return -n * log(residualVariance) - c * k * (log(n) - log(2 * PI)); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); // throwMinimalLinearDependentSet(parents, cov); } }
private double pValue(Node node, List<Node> parents) { List<Double> _residuals = new ArrayList<Double>(); Node _target = node; List<Node> _regressors = parents; Node target = getVariable(variables, _target.getName()); List<Node> regressors = new ArrayList<Node>(); for (Node _regressor : _regressors) { Node variable = getVariable(variables, _regressor.getName()); regressors.add(variable); } DATASET: for (int m = 0; m < dataSets.size(); m++) { RegressionResult result = regressions.get(m).regress(target, regressors); TetradVector residualsSingleDataset = result.getResiduals(); for (int h = 0; h < residualsSingleDataset.size(); h++) { if (Double.isNaN(residualsSingleDataset.get(h))) { continue DATASET; } } DoubleArrayList _residualsSingleDataset = new DoubleArrayList(residualsSingleDataset.toArray()); double mean = Descriptive.mean(_residualsSingleDataset); double std = Descriptive.standardDeviation( Descriptive.variance( _residualsSingleDataset.size(), Descriptive.sum(_residualsSingleDataset), Descriptive.sumOfSquares(_residualsSingleDataset))); for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) { // _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean) / // std); if (isMeanCenterResiduals()) { _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean)); } // _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2))); } for (int k = 0; k < _residualsSingleDataset.size(); k++) { _residuals.add(_residualsSingleDataset.get(k)); } } double[] _f = new double[_residuals.size()]; for (int k = 0; k < _residuals.size(); k++) { _f[k] = _residuals.get(k); } return new AndersonDarlingTest(_f).getP(); }
@Test public void test5() { RandomUtil.getInstance().setSeed(29999483L); List<Node> nodes = new ArrayList<>(); for (int i1 = 0; i1 < 5; i1++) { nodes.add(new ContinuousVariable("X" + (i1 + 1))); } Graph graph = new Dag(GraphUtils.randomGraph(nodes, 0, 5, 30, 15, 15, false)); SemPm semPm = new SemPm(graph); SemIm semIm = new SemIm(semPm); semIm.simulateDataReducedForm(1000, false); GeneralizedSemPm pm = new GeneralizedSemPm(semPm); GeneralizedSemIm im = new GeneralizedSemIm(pm, semIm); TetradVector e = new TetradVector(5); for (int i = 0; i < e.size(); i++) { e.set(i, RandomUtil.getInstance().nextNormal(0, 1)); } TetradVector record1 = semIm.simulateOneRecord(e); TetradVector record2 = im.simulateOneRecord(e); print("XXX1" + e); print("XXX2" + record1); print("XXX3" + record2); for (int i = 0; i < record1.size(); i++) { assertEquals(record1.get(i), record2.get(i), 1e-10); } }
private double andersonDarlingPASquareStarB(Node node, List<Node> parents) { List<Double> _residuals = new ArrayList<Double>(); Node _target = node; List<Node> _regressors = parents; Node target = getVariable(variables, _target.getName()); List<Node> regressors = new ArrayList<Node>(); for (Node _regressor : _regressors) { Node variable = getVariable(variables, _regressor.getName()); regressors.add(variable); } double sum = 0.0; DATASET: for (int m = 0; m < dataSets.size(); m++) { RegressionResult result = regressions.get(m).regress(target, regressors); TetradVector residualsSingleDataset = result.getResiduals(); for (int h = 0; h < residualsSingleDataset.size(); h++) { if (Double.isNaN(residualsSingleDataset.get(h))) { continue DATASET; } } DoubleArrayList _residualsSingleDataset = new DoubleArrayList(residualsSingleDataset.toArray()); double mean = Descriptive.mean(_residualsSingleDataset); double std = Descriptive.standardDeviation( Descriptive.variance( _residualsSingleDataset.size(), Descriptive.sum(_residualsSingleDataset), Descriptive.sumOfSquares(_residualsSingleDataset))); // By centering the individual residual columns, all moments of the mixture become weighted // averages of the moments // of the individual columns. // http://en.wikipedia.org/wiki/Mixture_distribution#Finite_and_countable_mixtures for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) { // _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean) / // std); // _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2)) / std); if (isMeanCenterResiduals()) { _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean)); } } double[] _f = new double[_residuals.size()]; for (int k = 0; k < _residuals.size(); k++) { _f[k] = _residuals.get(k); } sum += new AndersonDarlingTest(_f).getASquaredStar(); } return sum / dataSets.size(); }
private double localScoreB(Node node, List<Node> parents) { double score = 0.0; double maxScore = Double.NEGATIVE_INFINITY; Node _target = node; List<Node> _regressors = parents; Node target = getVariable(variables, _target.getName()); List<Node> regressors = new ArrayList<Node>(); for (Node _regressor : _regressors) { Node variable = getVariable(variables, _regressor.getName()); regressors.add(variable); } DATASET: for (int m = 0; m < dataSets.size(); m++) { RegressionResult result = regressions.get(m).regress(target, regressors); TetradVector residualsSingleDataset = result.getResiduals(); DoubleArrayList _residualsSingleDataset = new DoubleArrayList(residualsSingleDataset.toArray()); for (int h = 0; h < residualsSingleDataset.size(); h++) { if (Double.isNaN(residualsSingleDataset.get(h))) { continue DATASET; } } double mean = Descriptive.mean(_residualsSingleDataset); double std = Descriptive.standardDeviation( Descriptive.variance( _residualsSingleDataset.size(), Descriptive.sum(_residualsSingleDataset), Descriptive.sumOfSquares(_residualsSingleDataset))); for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) { _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean) / std); } double[] _f = new double[_residualsSingleDataset.size()]; for (int k = 0; k < _residualsSingleDataset.size(); k++) { _f[k] = _residualsSingleDataset.get(k); } DoubleArrayList f = new DoubleArrayList(_f); for (int k = 0; k < f.size(); k++) { f.set(k, Math.abs(f.get(k))); } double _mean = Descriptive.mean(f); double diff = _mean - Math.sqrt(2.0 / Math.PI); score += diff * diff; if (score > maxScore) { maxScore = score; } } double avg = score / dataSets.size(); return avg; }