@Test public void test2() { RandomUtil.getInstance().setSeed(2999983L); int sampleSize = 1000; List<Node> variableNodes = new ArrayList<>(); ContinuousVariable x1 = new ContinuousVariable("X1"); ContinuousVariable x2 = new ContinuousVariable("X2"); ContinuousVariable x3 = new ContinuousVariable("X3"); ContinuousVariable x4 = new ContinuousVariable("X4"); ContinuousVariable x5 = new ContinuousVariable("X5"); variableNodes.add(x1); variableNodes.add(x2); variableNodes.add(x3); variableNodes.add(x4); variableNodes.add(x5); Graph _graph = new EdgeListGraph(variableNodes); SemGraph graph = new SemGraph(_graph); graph.addDirectedEdge(x1, x3); graph.addDirectedEdge(x2, x3); graph.addDirectedEdge(x3, x4); graph.addDirectedEdge(x2, x4); graph.addDirectedEdge(x4, x5); graph.addDirectedEdge(x2, x5); SemPm semPm = new SemPm(graph); SemIm semIm = new SemIm(semPm); DataSet dataSet = semIm.simulateData(sampleSize, false); print(semPm); GeneralizedSemPm _semPm = new GeneralizedSemPm(semPm); GeneralizedSemIm _semIm = new GeneralizedSemIm(_semPm, semIm); DataSet _dataSet = _semIm.simulateDataMinimizeSurface(sampleSize, false); print(_semPm); // System.out.println(_dataSet); for (int j = 0; j < dataSet.getNumColumns(); j++) { double[] col = dataSet.getDoubleData().getColumn(j).toArray(); double[] _col = _dataSet.getDoubleData().getColumn(j).toArray(); double mean = StatUtils.mean(col); double _mean = StatUtils.mean(_col); double variance = StatUtils.variance(col); double _variance = StatUtils.variance(_col); assertEquals(mean, _mean, 0.3); assertEquals(1.0, variance / _variance, .2); } }
private double score(Node y, List<Node> parents) { if (score == Score.andersonDarling) { return andersonDarlingPASquareStar(y, parents); } else if (score == Score.kurtosis) { return Math.abs(StatUtils.kurtosis(residual(y, parents))); } else if (score == Score.skew) { return Math.abs(StatUtils.skewness(residual(y, parents))); } else if (score == Score.fifthMoment) { return Math.abs(StatUtils.standardizedFifthMoment(residual(y, parents))); } else if (score == Score.absoluteValue) { return localScoreA(y, parents); } throw new IllegalStateException(); }
public boolean isIndependent(Node x, Node y, List<Node> z) { int[] all = new int[z.size() + 2]; all[0] = variablesMap.get(x); all[1] = variablesMap.get(y); for (int i = 0; i < z.size(); i++) { all[i + 2] = variablesMap.get(z.get(i)); } int sampleSize = data.get(0).rows(); List<Double> pValues = new ArrayList<Double>(); for (int m = 0; m < ncov.size(); m++) { TetradMatrix _ncov = ncov.get(m).getSelection(all, all); TetradMatrix inv = _ncov.inverse(); double r = -inv.get(0, 1) / sqrt(inv.get(0, 0) * inv.get(1, 1)); double fisherZ = sqrt(sampleSize - z.size() - 3.0) * 0.5 * (Math.log(1.0 + r) - Math.log(1.0 - r)); double pValue; if (Double.isInfinite(fisherZ)) { pValue = 0; } else { pValue = 2.0 * (1.0 - RandomUtil.getInstance().normalCdf(0, 1, abs(fisherZ))); } pValues.add(pValue); } double _cutoff = alpha; if (fdr) { _cutoff = StatUtils.fdrCutoff(alpha, pValues, false); } Collections.sort(pValues); int index = (int) round((1.0 - percent) * pValues.size()); this.pValue = pValues.get(index); // if (this.pValue == 0) { // System.out.println("Zero pvalue "+ SearchLogUtils.independenceFactMsg(x, y, z, // getPValue())); // } boolean independent = this.pValue > _cutoff; if (verbose) { if (independent) { TetradLogger.getInstance() .log("independencies", SearchLogUtils.independenceFactMsg(x, y, z, getPValue())); // System.out.println(SearchLogUtils.independenceFactMsg(x, y, z, getPValue())); } else { TetradLogger.getInstance() .log("dependencies", SearchLogUtils.dependenceFactMsg(x, y, z, getPValue())); } } return independent; }
/** * Determines whether variable x is independent of variable y given a list of conditioning * variables z. * * @param x the one variable being compared. * @param y the second variable being compared. * @param z the list of conditioning variables. * @return true iff x _||_ y | z. * @throws RuntimeException if a matrix singularity is encountered. */ public boolean isIndependent(Node x, Node y, List<Node> z) { TetradMatrix submatrix = subMatrix(x, y, z); double r = 0; try { r = StatUtils.partialCorrelation(submatrix); if (Double.isNaN((r)) || r < -1. || r > 1.) throw new RuntimeException(); } catch (Exception e) { DepthChoiceGenerator gen = new DepthChoiceGenerator(z.size(), z.size()); int[] choice; while ((choice = gen.next()) != null) { try { List<Node> z2 = new ArrayList<Node>(z); z2.removeAll(GraphUtils.asList(choice, z)); submatrix = subMatrix(x, y, z2); r = StatUtils.partialCorrelation(submatrix); } catch (Exception e2) { continue; } // if (Double.isNaN(r)) continue; // // if (r > 1.) r = 1.; // if (r < -1.) r = -1.; if (Double.isNaN(r) || r < -1. || r > 1.) continue; break; } } // Either dividing by a zero standard deviation (in which case it's dependent) or doing a // regression // (effectively) with a multicolliarity if (Double.isNaN(r)) { int[] _z = new int[z.size()]; // for (int i = 0; i < _z.length; i++) _z[i] = i + 2; // //// double varx = StatUtils.partialVariance(submatrix, 0, _z); // submatrix.get(0, // 0); //// double vary = StatUtils.partialVariance(submatrix, 1, _z); //submatrix.get(1, // 1); // // double varx = submatrix.get(0, 0); // double vary = submatrix.get(1, 1); // // if (varx * vary == 0) { return true; // } } if (r > 1.) r = 1.; if (r < -1.) r = -1.; this.fisherZ = Math.sqrt(sampleSize() - z.size() - 3.0) * 0.5 * (Math.log(1.0 + r) - Math.log(1.0 - r)); if (Double.isNaN(this.fisherZ)) { throw new IllegalArgumentException( "The Fisher's Z " + "score for independence fact " + x + " _||_ " + y + " | " + z + " is undefined. r = " + r); } boolean independent = getPValue() > alpha; if (independent) { TetradLogger.getInstance() .log("independencies", SearchLogUtils.independenceFactMsg(x, y, z, getPValue())); } else { TetradLogger.getInstance() .log("dependencies", SearchLogUtils.dependenceFactMsg(x, y, z, getPValue())); } return independent; }
@Test public void testHistogram() { RandomUtil.getInstance().setSeed(4829384L); List<Node> nodes = new ArrayList<Node>(); for (int i = 0; i < 5; i++) { nodes.add(new ContinuousVariable("X" + (i + 1))); } Dag trueGraph = new Dag(GraphUtils.randomGraph(nodes, 0, 5, 30, 15, 15, false)); int sampleSize = 1000; // Continuous SemPm semPm = new SemPm(trueGraph); SemIm semIm = new SemIm(semPm); DataSet data = semIm.simulateData(sampleSize, false); Histogram histogram = new Histogram(data); histogram.setTarget("X1"); histogram.setNumBins(20); assertEquals(3.76, histogram.getMax(), 0.01); assertEquals(-3.83, histogram.getMin(), 0.01); assertEquals(1000, histogram.getN()); histogram.setTarget("X1"); histogram.setNumBins(10); histogram.addConditioningVariable("X3", 0, 1); histogram.addConditioningVariable("X4", 0, 1); histogram.removeConditioningVariable("X3"); assertEquals(3.76, histogram.getMax(), 0.01); assertEquals(-3.83, histogram.getMin(), 0.01); assertEquals(188, histogram.getN()); double[] arr = histogram.getContinuousData("X2"); histogram.addConditioningVariable("X2", StatUtils.min(arr), StatUtils.mean(arr)); // Discrete BayesPm bayesPm = new BayesPm(trueGraph); BayesIm bayesIm = new MlBayesIm(bayesPm, MlBayesIm.RANDOM); DataSet data2 = bayesIm.simulateData(sampleSize, false); // For some reason these are giving different // values when all of the unit tests are run are // once. TODO They produce stable values when // this particular test is run repeatedly. Histogram histogram2 = new Histogram(data2); histogram2.setTarget("X1"); int[] frequencies1 = histogram2.getFrequencies(); // assertEquals(928, frequencies1[0]); // assertEquals(72, frequencies1[1]); histogram2.setTarget("X1"); histogram2.addConditioningVariable("X2", 0); histogram2.addConditioningVariable("X3", 1); int[] frequencies = histogram2.getFrequencies(); // assertEquals(377, frequencies[0]); // assertEquals(28, frequencies[1]); }
@Test public void test3() { RandomUtil.getInstance().setSeed(49293843L); List<Node> variableNodes = new ArrayList<>(); ContinuousVariable x1 = new ContinuousVariable("X1"); ContinuousVariable x2 = new ContinuousVariable("X2"); ContinuousVariable x3 = new ContinuousVariable("X3"); ContinuousVariable x4 = new ContinuousVariable("X4"); ContinuousVariable x5 = new ContinuousVariable("X5"); variableNodes.add(x1); variableNodes.add(x2); variableNodes.add(x3); variableNodes.add(x4); variableNodes.add(x5); Graph _graph = new EdgeListGraph(variableNodes); SemGraph graph = new SemGraph(_graph); graph.setShowErrorTerms(true); Node e1 = graph.getExogenous(x1); Node e2 = graph.getExogenous(x2); Node e3 = graph.getExogenous(x3); Node e4 = graph.getExogenous(x4); Node e5 = graph.getExogenous(x5); graph.addDirectedEdge(x1, x3); graph.addDirectedEdge(x1, x2); graph.addDirectedEdge(x2, x3); graph.addDirectedEdge(x3, x4); graph.addDirectedEdge(x2, x4); graph.addDirectedEdge(x4, x5); graph.addDirectedEdge(x2, x5); graph.addDirectedEdge(x5, x1); GeneralizedSemPm pm = new GeneralizedSemPm(graph); List<Node> variablesNodes = pm.getVariableNodes(); print(variablesNodes); List<Node> errorNodes = pm.getErrorNodes(); print(errorNodes); try { pm.setNodeExpression(x1, "cos(b1) + a1 * X5 + E_X1"); pm.setNodeExpression(x2, "a2 * X1 + E_X2"); pm.setNodeExpression(x3, "tan(a3*X2 + a4*X1) + E_X3"); pm.setNodeExpression(x4, "0.1 * E^X2 + X3 + E_X4"); pm.setNodeExpression(x5, "0.1 * E^X4 + a6* X2 + E_X5"); pm.setNodeExpression(e1, "U(0, 1)"); pm.setNodeExpression(e2, "U(0, 1)"); pm.setNodeExpression(e3, "U(0, 1)"); pm.setNodeExpression(e4, "U(0, 1)"); pm.setNodeExpression(e5, "U(0, 1)"); GeneralizedSemIm im = new GeneralizedSemIm(pm); print(im); DataSet dataSet = im.simulateDataNSteps(1000, false); // System.out.println(dataSet); double[] d1 = dataSet.getDoubleData().getColumn(0).toArray(); double[] d2 = dataSet.getDoubleData().getColumn(1).toArray(); double cov = StatUtils.covariance(d1, d2); assertEquals(-0.002, cov, 0.001); } catch (ParseException e) { e.printStackTrace(); } }