public void rtest3() { Node x = new GraphNode("X"); Node y = new GraphNode("Y"); Node z = new GraphNode("Z"); Node w = new GraphNode("W"); List<Node> nodes = new ArrayList<Node>(); nodes.add(x); nodes.add(y); nodes.add(z); nodes.add(w); Graph g = new EdgeListGraph(nodes); g.addDirectedEdge(x, y); g.addDirectedEdge(x, z); g.addDirectedEdge(y, w); g.addDirectedEdge(z, w); Graph maxGraph = null; double maxPValue = -1.0; ICovarianceMatrix maxLatentCov = null; Graph mim = DataGraphUtils.randomMim(g, 8, 0, 0, 0, true); // Graph mim = DataGraphUtils.randomSingleFactorModel(5, 5, 8, 0, 0, 0); Graph mimStructure = structure(mim); SemPm pm = new SemPm(mim); System.out.println("\n\nTrue graph:"); System.out.println(mimStructure); SemImInitializationParams params = new SemImInitializationParams(); params.setCoefRange(0.5, 1.5); SemIm im = new SemIm(pm, params); int N = 1000; DataSet data = im.simulateData(N, false); CovarianceMatrix cov = new CovarianceMatrix(data); for (int i = 0; i < 1; i++) { ICovarianceMatrix _cov = DataUtils.reorderColumns(cov); List<List<Node>> partition; FindOneFactorClusters fofc = new FindOneFactorClusters(_cov, TestType.TETRAD_WISHART, .001); fofc.search(); partition = fofc.getClusters(); System.out.println(partition); List<String> latentVarList = reidentifyVariables(mim, data, partition, 2); Mimbuild2 mimbuild = new Mimbuild2(); mimbuild.setAlpha(0.001); // mimbuild.setMinimumSize(5); // To test knowledge. // Knowledge knowledge = new Knowledge2(); // knowledge.setEdgeForbidden("L.Y", "L.W", true); // knowledge.setEdgeRequired("L.Y", "L.Z", true); // mimbuild.setKnowledge(knowledge); Graph mimbuildStructure = mimbuild.search(partition, latentVarList, _cov); double pValue = mimbuild.getpValue(); System.out.println(mimbuildStructure); System.out.println("P = " + pValue); System.out.println("Latent Cov = " + mimbuild.getLatentsCov()); if (pValue > maxPValue) { maxPValue = pValue; maxGraph = new EdgeListGraph(mimbuildStructure); maxLatentCov = mimbuild.getLatentsCov(); } } System.out.println("\n\nTrue graph:"); System.out.println(mimStructure); System.out.println("\nBest graph:"); System.out.println(maxGraph); System.out.println("P = " + maxPValue); System.out.println("Latent Cov = " + maxLatentCov); System.out.println(); }
public void rtest4() { System.out.println("SHD\tP"); // System.out.println("MB1\tMB2\tMB3\tMB4\tMB5\tMB6"); Graph mim = DataGraphUtils.randomSingleFactorModel(5, 5, 8, 0, 0, 0); Graph mimStructure = structure(mim); SemPm pm = new SemPm(mim); SemImInitializationParams params = new SemImInitializationParams(); params.setCoefRange(0.5, 1.5); NumberFormat nf = new DecimalFormat("0.0000"); int totalError = 0; int errorCount = 0; int maxScore = 0; int maxNumMeasures = 0; double maxP = 0.0; for (int r = 0; r < 1; r++) { SemIm im = new SemIm(pm, params); DataSet data = im.simulateData(1000, false); mim = GraphUtils.replaceNodes(mim, data.getVariables()); List<List<Node>> trueClusters = MimUtils.convertToClusters2(mim); CovarianceMatrix _cov = new CovarianceMatrix(data); ICovarianceMatrix cov = DataUtils.reorderColumns(_cov); String algorithm = "FOFC"; Graph searchGraph; List<List<Node>> partition; if (algorithm.equals("FOFC")) { FindOneFactorClusters fofc = new FindOneFactorClusters(cov, TestType.TETRAD_WISHART, 0.001f); searchGraph = fofc.search(); searchGraph = GraphUtils.replaceNodes(searchGraph, data.getVariables()); partition = MimUtils.convertToClusters2(searchGraph); } else if (algorithm.equals("BPC")) { TestType testType = TestType.TETRAD_WISHART; TestType purifyType = TestType.TETRAD_BASED2; BuildPureClusters bpc = new BuildPureClusters(data, 0.001, testType, purifyType); searchGraph = bpc.search(); partition = MimUtils.convertToClusters2(searchGraph); } else { throw new IllegalStateException(); } mimStructure = GraphUtils.replaceNodes(mimStructure, data.getVariables()); List<String> latentVarList = reidentifyVariables(mim, data, partition, 2); Graph mimbuildStructure; Mimbuild2 mimbuild = new Mimbuild2(); mimbuild.setAlpha(0.001); mimbuild.setMinClusterSize(3); try { mimbuildStructure = mimbuild.search(partition, latentVarList, cov); } catch (Exception e) { e.printStackTrace(); continue; } mimbuildStructure = GraphUtils.replaceNodes(mimbuildStructure, data.getVariables()); mimbuildStructure = condense(mimStructure, mimbuildStructure); // Graph mimSubgraph = new EdgeListGraph(mimStructure); // // for (Node node : mimSubgraph.getNodes()) { // if (!mimStructure.getNodes().contains(node)) { // mimSubgraph.removeNode(node); // } // } int shd = SearchGraphUtils.structuralHammingDistance(mimStructure, mimbuildStructure); boolean impureCluster = containsImpureCluster(partition, trueClusters); double pValue = mimbuild.getpValue(); boolean pBelow05 = pValue < 0.05; boolean numClustersGreaterThan5 = partition.size() != 5; boolean error = false; // boolean condition = impureCluster || numClustersGreaterThan5 || pBelow05; // boolean condition = numClustersGreaterThan5 || pBelow05; boolean condition = numClustered(partition) == 40; if (!condition && (shd > 5)) { error = true; } if (!condition) { totalError += shd; errorCount++; } // if (numClustered(partition) > maxNumMeasures) { // maxNumMeasures = numClustered(partition); // maxP = pValue; // maxScore = shd; // System.out.println("maxNumMeasures = " + maxNumMeasures); // System.out.println("maxScore = " + maxScore); // System.out.println("maxP = " + maxP); // System.out.println("clusters = " + clusterSizes(partition, trueClusters)); // } // else if (pValue > maxP) { maxScore = shd; maxP = mimbuild.getpValue(); maxNumMeasures = numClustered(partition); System.out.println("maxNumMeasures = " + maxNumMeasures); System.out.println("maxScore = " + maxScore); System.out.println("maxP = " + maxP); System.out.println("clusters = " + clusterSizes(partition, trueClusters)); } System.out.print( shd + "\t" + nf.format(pValue) + " " // + (error ? 1 : 0) + " " // + (pBelow05 ? "P < 0.05 " : "") // + (impureCluster ? "Impure cluster " : "") // + (numClustersGreaterThan5 ? "# Clusters != 5 " : "") // + clusterSizes(partition, trueClusters) + numClustered(partition)); System.out.println(); } System.out.println("\nAvg SHD for not-flagged cases = " + (totalError / (double) errorCount)); System.out.println("maxNumMeasures = " + maxNumMeasures); System.out.println("maxScore = " + maxScore); System.out.println("maxP = " + maxP); }
public DataSet simulateDataCholesky( int sampleSize, TetradMatrix covar, List<Node> variableNodes) { List<Node> variables = new LinkedList<Node>(); for (Node node : variableNodes) { variables.add(node); } List<Node> newVariables = new ArrayList<Node>(); for (Node node : variables) { ContinuousVariable continuousVariable = new ContinuousVariable(node.getName()); continuousVariable.setNodeType(node.getNodeType()); newVariables.add(continuousVariable); } TetradMatrix impliedCovar = covar; DataSet fullDataSet = new ColtDataSet(sampleSize, newVariables); TetradMatrix cholesky = MatrixUtils.choleskyC(impliedCovar); // Simulate the data by repeatedly calling the Cholesky.exogenousData // method. Store only the data for the measured variables. ROW: for (int row = 0; row < sampleSize; row++) { // Step 1. Generate normal samples. double exoData[] = new double[cholesky.rows()]; for (int i = 0; i < exoData.length; i++) { exoData[i] = RandomUtil.getInstance().nextNormal(0, 1); // exoData[i] = randomUtil.nextUniform(-1, 1); } // Step 2. Multiply by cholesky to get correct covariance. double point[] = new double[exoData.length]; for (int i = 0; i < exoData.length; i++) { double sum = 0.0; for (int j = 0; j <= i; j++) { sum += cholesky.get(i, j) * exoData[j]; } point[i] = sum; } double rowData[] = point; for (int col = 0; col < variables.size(); col++) { int index = variableNodes.indexOf(variables.get(col)); double value = rowData[index]; if (Double.isNaN(value) || Double.isInfinite(value)) { throw new IllegalArgumentException("Value out of range: " + value); } fullDataSet.setDouble(row, col, value); } } return DataUtils.restrictToMeasured(fullDataSet); }