private void setDataSet(DataSet dataSet) { List<String> _varNames = dataSet.getVariableNames(); this.variables = dataSet.getVariables(); this.dataSet = dataSet; this.discrete = dataSet.isDiscrete(); if (!isDiscrete()) { this.covariances = new CovarianceMatrix(dataSet); } this.sampleSize = dataSet.getNumRows(); }
public GesConcurrent(DataSet dataSet) { setDataSet(dataSet); if (dataSet.isDiscrete()) { BDeuScore score = new BDeuScore(dataSet); score.setSamplePrior(10); score.setStructurePrior(0.001); } setStructurePrior(0.001); setSamplePrior(10.); }
public void rtest4() { System.out.println("SHD\tP"); // System.out.println("MB1\tMB2\tMB3\tMB4\tMB5\tMB6"); Graph mim = DataGraphUtils.randomSingleFactorModel(5, 5, 8, 0, 0, 0); Graph mimStructure = structure(mim); SemPm pm = new SemPm(mim); SemImInitializationParams params = new SemImInitializationParams(); params.setCoefRange(0.5, 1.5); NumberFormat nf = new DecimalFormat("0.0000"); int totalError = 0; int errorCount = 0; int maxScore = 0; int maxNumMeasures = 0; double maxP = 0.0; for (int r = 0; r < 1; r++) { SemIm im = new SemIm(pm, params); DataSet data = im.simulateData(1000, false); mim = GraphUtils.replaceNodes(mim, data.getVariables()); List<List<Node>> trueClusters = MimUtils.convertToClusters2(mim); CovarianceMatrix _cov = new CovarianceMatrix(data); ICovarianceMatrix cov = DataUtils.reorderColumns(_cov); String algorithm = "FOFC"; Graph searchGraph; List<List<Node>> partition; if (algorithm.equals("FOFC")) { FindOneFactorClusters fofc = new FindOneFactorClusters(cov, TestType.TETRAD_WISHART, 0.001f); searchGraph = fofc.search(); searchGraph = GraphUtils.replaceNodes(searchGraph, data.getVariables()); partition = MimUtils.convertToClusters2(searchGraph); } else if (algorithm.equals("BPC")) { TestType testType = TestType.TETRAD_WISHART; TestType purifyType = TestType.TETRAD_BASED2; BuildPureClusters bpc = new BuildPureClusters(data, 0.001, testType, purifyType); searchGraph = bpc.search(); partition = MimUtils.convertToClusters2(searchGraph); } else { throw new IllegalStateException(); } mimStructure = GraphUtils.replaceNodes(mimStructure, data.getVariables()); List<String> latentVarList = reidentifyVariables(mim, data, partition, 2); Graph mimbuildStructure; Mimbuild2 mimbuild = new Mimbuild2(); mimbuild.setAlpha(0.001); mimbuild.setMinClusterSize(3); try { mimbuildStructure = mimbuild.search(partition, latentVarList, cov); } catch (Exception e) { e.printStackTrace(); continue; } mimbuildStructure = GraphUtils.replaceNodes(mimbuildStructure, data.getVariables()); mimbuildStructure = condense(mimStructure, mimbuildStructure); // Graph mimSubgraph = new EdgeListGraph(mimStructure); // // for (Node node : mimSubgraph.getNodes()) { // if (!mimStructure.getNodes().contains(node)) { // mimSubgraph.removeNode(node); // } // } int shd = SearchGraphUtils.structuralHammingDistance(mimStructure, mimbuildStructure); boolean impureCluster = containsImpureCluster(partition, trueClusters); double pValue = mimbuild.getpValue(); boolean pBelow05 = pValue < 0.05; boolean numClustersGreaterThan5 = partition.size() != 5; boolean error = false; // boolean condition = impureCluster || numClustersGreaterThan5 || pBelow05; // boolean condition = numClustersGreaterThan5 || pBelow05; boolean condition = numClustered(partition) == 40; if (!condition && (shd > 5)) { error = true; } if (!condition) { totalError += shd; errorCount++; } // if (numClustered(partition) > maxNumMeasures) { // maxNumMeasures = numClustered(partition); // maxP = pValue; // maxScore = shd; // System.out.println("maxNumMeasures = " + maxNumMeasures); // System.out.println("maxScore = " + maxScore); // System.out.println("maxP = " + maxP); // System.out.println("clusters = " + clusterSizes(partition, trueClusters)); // } // else if (pValue > maxP) { maxScore = shd; maxP = mimbuild.getpValue(); maxNumMeasures = numClustered(partition); System.out.println("maxNumMeasures = " + maxNumMeasures); System.out.println("maxScore = " + maxScore); System.out.println("maxP = " + maxP); System.out.println("clusters = " + clusterSizes(partition, trueClusters)); } System.out.print( shd + "\t" + nf.format(pValue) + " " // + (error ? 1 : 0) + " " // + (pBelow05 ? "P < 0.05 " : "") // + (impureCluster ? "Impure cluster " : "") // + (numClustersGreaterThan5 ? "# Clusters != 5 " : "") // + clusterSizes(partition, trueClusters) + numClustered(partition)); System.out.println(); } System.out.println("\nAvg SHD for not-flagged cases = " + (totalError / (double) errorCount)); System.out.println("maxNumMeasures = " + maxNumMeasures); System.out.println("maxScore = " + maxScore); System.out.println("maxP = " + maxP); }
public DataSet simulateDataCholesky( int sampleSize, TetradMatrix covar, List<Node> variableNodes) { List<Node> variables = new LinkedList<Node>(); for (Node node : variableNodes) { variables.add(node); } List<Node> newVariables = new ArrayList<Node>(); for (Node node : variables) { ContinuousVariable continuousVariable = new ContinuousVariable(node.getName()); continuousVariable.setNodeType(node.getNodeType()); newVariables.add(continuousVariable); } TetradMatrix impliedCovar = covar; DataSet fullDataSet = new ColtDataSet(sampleSize, newVariables); TetradMatrix cholesky = MatrixUtils.choleskyC(impliedCovar); // Simulate the data by repeatedly calling the Cholesky.exogenousData // method. Store only the data for the measured variables. ROW: for (int row = 0; row < sampleSize; row++) { // Step 1. Generate normal samples. double exoData[] = new double[cholesky.rows()]; for (int i = 0; i < exoData.length; i++) { exoData[i] = RandomUtil.getInstance().nextNormal(0, 1); // exoData[i] = randomUtil.nextUniform(-1, 1); } // Step 2. Multiply by cholesky to get correct covariance. double point[] = new double[exoData.length]; for (int i = 0; i < exoData.length; i++) { double sum = 0.0; for (int j = 0; j <= i; j++) { sum += cholesky.get(i, j) * exoData[j]; } point[i] = sum; } double rowData[] = point; for (int col = 0; col < variables.size(); col++) { int index = variableNodes.indexOf(variables.get(col)); double value = rowData[index]; if (Double.isNaN(value) || Double.isInfinite(value)) { throw new IllegalArgumentException("Value out of range: " + value); } fullDataSet.setDouble(row, col, value); } } return DataUtils.restrictToMeasured(fullDataSet); }