/** * Calculates the sample likelihood and BIC score for i given its parents in a simple SEM model. */ private double localSemScore(int i, int[] parents) { try { ICovarianceMatrix cov = getCovMatrix(); double varianceY = cov.getValue(i, i); double residualVariance = varianceY; int n = sampleSize(); int p = parents.length; int k = (p * (p + 1)) / 2 + p; // int k = (p + 1) * (p + 1); // int k = p + 1; TetradMatrix covxx = cov.getSelection(parents, parents); TetradMatrix covxxInv = covxx.inverse(); TetradVector covxy = cov.getSelection(parents, new int[] {i}).getColumn(0); TetradVector b = covxxInv.times(covxy); residualVariance -= covxy.dotProduct(b); if (residualVariance <= 0 && verbose) { out.println( "Nonpositive residual varianceY: resVar / varianceY = " + (residualVariance / varianceY)); return Double.NaN; } double c = getPenaltyDiscount(); // return -n * log(residualVariance) - 2 * k; //AIC return -n * Math.log(residualVariance) - c * k * Math.log(n); // return -n * log(residualVariance) - c * k * (log(n) - log(2 * PI)); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); // throwMinimalLinearDependentSet(parents, cov); } }
private double score(Node y, List<Node> parents) { if (score == Score.andersonDarling) { return andersonDarlingPASquareStar(y, parents); } else if (score == Score.kurtosis) { return Math.abs(StatUtils.kurtosis(residual(y, parents))); } else if (score == Score.skew) { return Math.abs(StatUtils.skewness(residual(y, parents))); } else if (score == Score.fifthMoment) { return Math.abs(StatUtils.standardizedFifthMoment(residual(y, parents))); } else if (score == Score.absoluteValue) { return localScoreA(y, parents); } throw new IllegalStateException(); }
public static List<Set<Node>> powerSet(List<Node> nodes) { List<Set<Node>> subsets = new ArrayList<Set<Node>>(); int total = (int) Math.pow(2, nodes.size()); for (int i = 0; i < total; i++) { Set<Node> newSet = new HashSet<Node>(); String selection = Integer.toBinaryString(i); for (int j = selection.length() - 1; j >= 0; j--) { if (selection.charAt(j) == '1') { newSet.add(nodes.get(selection.length() - j - 1)); } } subsets.add(newSet); } return subsets; }
public static boolean existsLocalSepsetWithoutDet( Node x, Node y, Node z, IndependenceTest test, Graph graph, int depth) { Set<Node> __nodes = new HashSet<Node>(graph.getAdjacentNodes(x)); __nodes.addAll(graph.getAdjacentNodes(z)); __nodes.remove(x); __nodes.remove(z); List<Node> _nodes = new LinkedList<Node>(__nodes); TetradLogger.getInstance() .log("adjacencies", "Adjacents for " + x + "--" + y + "--" + z + " = " + _nodes); int _depth = depth; if (_depth == -1) { _depth = 1000; } _depth = Math.min(_depth, _nodes.size()); for (int d = 0; d <= _depth; d++) { if (_nodes.size() >= d) { ChoiceGenerator cg2 = new ChoiceGenerator(_nodes.size(), d); int[] choice; while ((choice = cg2.next()) != null) { List<Node> condSet = asList(choice, _nodes); if (condSet.contains(y)) { continue; } if (test.determines(condSet, y)) { continue; } // LogUtils.getInstance().finest("Trying " + condSet); if (test.isIndependent(x, z, condSet)) { return true; } } } } return false; }
public static CpcTripleType getCpcTripleType( Node x, Node y, Node z, IndependenceTest test, int depth, Graph graph) { // System.out.println("getCpcTripleType 1"); boolean existsSepsetContainingY = false; boolean existsSepsetNotContainingY = false; Set<Node> __nodes = new HashSet<Node>(graph.getAdjacentNodes(x)); __nodes.remove(z); // System.out.println("getCpcTripleType 2"); List<Node> _nodes = new LinkedList<Node>(__nodes); TetradLogger.getInstance() .log("adjacencies", "Adjacents for " + x + "--" + y + "--" + z + " = " + _nodes); // System.out.println("getCpcTripleType 3"); int _depth = depth; if (_depth == -1) { _depth = 1000; } _depth = Math.min(_depth, _nodes.size()); // System.out.println("getCpcTripleType 4"); for (int d = 0; d <= _depth; d++) { // System.out.println("getCpcTripleType 5"); ChoiceGenerator cg = new ChoiceGenerator(_nodes.size(), d); int[] choice; while ((choice = cg.next()) != null) { // System.out.println("getCpcTripleType 6"); List<Node> condSet = GraphUtils.asList(choice, _nodes); // System.out.println("getCpcTripleType 7"); if (test.isIndependent(x, z, condSet)) { if (condSet.contains(y)) { existsSepsetContainingY = true; } else { existsSepsetNotContainingY = true; } } } } // System.out.println("getCpcTripleType 8"); __nodes = new HashSet<Node>(graph.getAdjacentNodes(z)); __nodes.remove(x); _nodes = new LinkedList<Node>(__nodes); TetradLogger.getInstance() .log("adjacencies", "Adjacents for " + x + "--" + y + "--" + z + " = " + _nodes); // System.out.println("getCpcTripleType 9"); _depth = depth; if (_depth == -1) { _depth = 1000; } _depth = Math.min(_depth, _nodes.size()); // System.out.println("getCpcTripleType 10"); for (int d = 0; d <= _depth; d++) { // System.out.println("getCpcTripleType 11"); ChoiceGenerator cg = new ChoiceGenerator(_nodes.size(), d); int[] choice; while ((choice = cg.next()) != null) { List<Node> condSet = GraphUtils.asList(choice, _nodes); if (test.isIndependent(x, z, condSet)) { if (condSet.contains(y)) { existsSepsetContainingY = true; } else { existsSepsetNotContainingY = true; } } } } // System.out.println("getCpcTripleType 12"); if (existsSepsetContainingY == existsSepsetNotContainingY) { return CpcTripleType.AMBIGUOUS; } else if (!existsSepsetNotContainingY) { return CpcTripleType.NONCOLLIDER; } else { return CpcTripleType.COLLIDER; } }
private double localScoreB(Node node, List<Node> parents) { double score = 0.0; double maxScore = Double.NEGATIVE_INFINITY; Node _target = node; List<Node> _regressors = parents; Node target = getVariable(variables, _target.getName()); List<Node> regressors = new ArrayList<Node>(); for (Node _regressor : _regressors) { Node variable = getVariable(variables, _regressor.getName()); regressors.add(variable); } DATASET: for (int m = 0; m < dataSets.size(); m++) { RegressionResult result = regressions.get(m).regress(target, regressors); TetradVector residualsSingleDataset = result.getResiduals(); DoubleArrayList _residualsSingleDataset = new DoubleArrayList(residualsSingleDataset.toArray()); for (int h = 0; h < residualsSingleDataset.size(); h++) { if (Double.isNaN(residualsSingleDataset.get(h))) { continue DATASET; } } double mean = Descriptive.mean(_residualsSingleDataset); double std = Descriptive.standardDeviation( Descriptive.variance( _residualsSingleDataset.size(), Descriptive.sum(_residualsSingleDataset), Descriptive.sumOfSquares(_residualsSingleDataset))); for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) { _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean) / std); } double[] _f = new double[_residualsSingleDataset.size()]; for (int k = 0; k < _residualsSingleDataset.size(); k++) { _f[k] = _residualsSingleDataset.get(k); } DoubleArrayList f = new DoubleArrayList(_f); for (int k = 0; k < f.size(); k++) { f.set(k, Math.abs(f.get(k))); } double _mean = Descriptive.mean(f); double diff = _mean - Math.sqrt(2.0 / Math.PI); score += diff * diff; if (score > maxScore) { maxScore = score; } } double avg = score / dataSets.size(); return avg; }
/** * Constructs a new standardized SEM IM from the freeParameters in the given SEM IM. * * @param im Stop asking me for these things! The given SEM IM!!! * @param initialization CALCULATE_FROM_SEM if the initial values will be calculated from the * given SEM IM; INITIALIZE_FROM_DATA if data will be simulated from the given SEM, * standardized, and estimated. */ public StandardizedSemIm(SemIm im, Initialization initialization) { this.semPm = new SemPm(im.getSemPm()); this.semGraph = new SemGraph(semPm.getGraph()); semGraph.setShowErrorTerms(true); if (semGraph.existsDirectedCycle()) { throw new IllegalArgumentException("The cyclic case is not handled."); } if (initialization == Initialization.CALCULATE_FROM_SEM) { // This code calculates the new coefficients directly from the old ones. edgeParameters = new HashMap<Edge, Double>(); List<Node> nodes = im.getVariableNodes(); TetradMatrix impliedCovar = im.getImplCovar(true); for (Parameter parameter : im.getSemPm().getParameters()) { if (parameter.getType() == ParamType.COEF) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); int aindex = nodes.indexOf(a); int bindex = nodes.indexOf(b); double vara = impliedCovar.get(aindex, aindex); double stda = Math.sqrt(vara); double varb = impliedCovar.get(bindex, bindex); double stdb = Math.sqrt(varb); double oldCoef = im.getEdgeCoef(a, b); double newCoef = (stda / stdb) * oldCoef; edgeParameters.put(Edges.directedEdge(a, b), newCoef); } else if (parameter.getType() == ParamType.COVAR) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); Node exoa = semGraph.getExogenous(a); Node exob = semGraph.getExogenous(b); double covar = im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b)); edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar); } } } else { // This code estimates the new coefficients from simulated data from the old model. DataSet dataSet = im.simulateData(1000, false); TetradMatrix _dataSet = dataSet.getDoubleData(); _dataSet = DataUtils.standardizeData(_dataSet); DataSet dataSetStandardized = ColtDataSet.makeData(dataSet.getVariables(), _dataSet); SemEstimator estimator = new SemEstimator(dataSetStandardized, im.getSemPm()); SemIm imStandardized = estimator.estimate(); edgeParameters = new HashMap<Edge, Double>(); for (Parameter parameter : imStandardized.getSemPm().getParameters()) { if (parameter.getType() == ParamType.COEF) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); double coef = imStandardized.getEdgeCoef(a, b); edgeParameters.put(Edges.directedEdge(a, b), coef); } else if (parameter.getType() == ParamType.COVAR) { Node a = parameter.getNodeA(); Node b = parameter.getNodeB(); Node exoa = semGraph.getExogenous(a); Node exob = semGraph.getExogenous(b); double covar = -im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b)); edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar); } } } this.measuredNodes = Collections.unmodifiableList(semPm.getMeasuredNodes()); }