// Cannot be done if the graph changes. public void setInitialGraph(Graph initialGraph) { initialGraph = GraphUtils.replaceNodes(initialGraph, variables); out.println("Initial graph variables: " + initialGraph.getNodes()); out.println("Data set variables: " + variables); if (!new HashSet<Node>(initialGraph.getNodes()).equals(new HashSet<Node>(variables))) { throw new IllegalArgumentException("Variables aren't the same."); } this.initialGraph = initialGraph; }
/** * @return the estimated conditional probability for the given assertion conditional on the given * condition. */ public double getConditionalProb(Proposition assertion, Proposition condition) { if (assertion.getVariableSource() != condition.getVariableSource()) { throw new IllegalArgumentException( "Assertion and condition must be " + "for the same Bayes IM."); } List<Node> assertionVars = assertion.getVariableSource().getVariables(); List<Node> dataVars = dataSet.getVariables(); assertionVars = GraphUtils.replaceNodes(assertionVars, dataVars); if (!new HashSet<Node>(assertionVars).equals(new HashSet<Node>(dataVars))) { throw new IllegalArgumentException( "Assertion variable and data variables" + " are either different or in a different order: " + "\n\tAssertion vars: " + assertionVars + "\n\tData vars: " + dataVars); } int[] point = new int[dims.length]; int count1 = 0; int count2 = 0; this.missingValueCaseFound = false; point: for (int i = 0; i < numRows; i++) { for (int j = 0; j < dims.length; j++) { point[j] = dataSet.getInt(i, j); if (point[j] == DiscreteVariable.MISSING_VALUE) { continue point; } } if (condition.isPermissibleCombination(point)) { count1++; if (assertion.isPermissibleCombination(point)) { count2++; } } } return count2 / (double) count1; }
public Graph search(List<Node> nodes) { long startTime = System.currentTimeMillis(); localScoreCache.clear(); if (!dataSet().getVariables().containsAll(nodes)) { throw new IllegalArgumentException("All of the nodes must be in " + "the supplied data set."); } Graph graph; if (initialGraph == null) { graph = new EdgeListGraphSingleConnections(nodes); } else { initialGraph = GraphUtils.replaceNodes(initialGraph, variables); graph = new EdgeListGraphSingleConnections(initialGraph); } topGraphs.clear(); buildIndexing(graph); addRequiredEdges(graph); score = 0.0; // Do forward search. fes(graph, nodes); // Do backward search. bes(graph); long endTime = System.currentTimeMillis(); this.elapsedTime = endTime - startTime; this.logger.log("graph", "\nReturning this graph: " + graph); this.logger.log("info", "Elapsed time = " + (elapsedTime) / 1000. + " s"); this.logger.flush(); return graph; }
public void rtest4() { System.out.println("SHD\tP"); // System.out.println("MB1\tMB2\tMB3\tMB4\tMB5\tMB6"); Graph mim = DataGraphUtils.randomSingleFactorModel(5, 5, 8, 0, 0, 0); Graph mimStructure = structure(mim); SemPm pm = new SemPm(mim); SemImInitializationParams params = new SemImInitializationParams(); params.setCoefRange(0.5, 1.5); NumberFormat nf = new DecimalFormat("0.0000"); int totalError = 0; int errorCount = 0; int maxScore = 0; int maxNumMeasures = 0; double maxP = 0.0; for (int r = 0; r < 1; r++) { SemIm im = new SemIm(pm, params); DataSet data = im.simulateData(1000, false); mim = GraphUtils.replaceNodes(mim, data.getVariables()); List<List<Node>> trueClusters = MimUtils.convertToClusters2(mim); CovarianceMatrix _cov = new CovarianceMatrix(data); ICovarianceMatrix cov = DataUtils.reorderColumns(_cov); String algorithm = "FOFC"; Graph searchGraph; List<List<Node>> partition; if (algorithm.equals("FOFC")) { FindOneFactorClusters fofc = new FindOneFactorClusters(cov, TestType.TETRAD_WISHART, 0.001f); searchGraph = fofc.search(); searchGraph = GraphUtils.replaceNodes(searchGraph, data.getVariables()); partition = MimUtils.convertToClusters2(searchGraph); } else if (algorithm.equals("BPC")) { TestType testType = TestType.TETRAD_WISHART; TestType purifyType = TestType.TETRAD_BASED2; BuildPureClusters bpc = new BuildPureClusters(data, 0.001, testType, purifyType); searchGraph = bpc.search(); partition = MimUtils.convertToClusters2(searchGraph); } else { throw new IllegalStateException(); } mimStructure = GraphUtils.replaceNodes(mimStructure, data.getVariables()); List<String> latentVarList = reidentifyVariables(mim, data, partition, 2); Graph mimbuildStructure; Mimbuild2 mimbuild = new Mimbuild2(); mimbuild.setAlpha(0.001); mimbuild.setMinClusterSize(3); try { mimbuildStructure = mimbuild.search(partition, latentVarList, cov); } catch (Exception e) { e.printStackTrace(); continue; } mimbuildStructure = GraphUtils.replaceNodes(mimbuildStructure, data.getVariables()); mimbuildStructure = condense(mimStructure, mimbuildStructure); // Graph mimSubgraph = new EdgeListGraph(mimStructure); // // for (Node node : mimSubgraph.getNodes()) { // if (!mimStructure.getNodes().contains(node)) { // mimSubgraph.removeNode(node); // } // } int shd = SearchGraphUtils.structuralHammingDistance(mimStructure, mimbuildStructure); boolean impureCluster = containsImpureCluster(partition, trueClusters); double pValue = mimbuild.getpValue(); boolean pBelow05 = pValue < 0.05; boolean numClustersGreaterThan5 = partition.size() != 5; boolean error = false; // boolean condition = impureCluster || numClustersGreaterThan5 || pBelow05; // boolean condition = numClustersGreaterThan5 || pBelow05; boolean condition = numClustered(partition) == 40; if (!condition && (shd > 5)) { error = true; } if (!condition) { totalError += shd; errorCount++; } // if (numClustered(partition) > maxNumMeasures) { // maxNumMeasures = numClustered(partition); // maxP = pValue; // maxScore = shd; // System.out.println("maxNumMeasures = " + maxNumMeasures); // System.out.println("maxScore = " + maxScore); // System.out.println("maxP = " + maxP); // System.out.println("clusters = " + clusterSizes(partition, trueClusters)); // } // else if (pValue > maxP) { maxScore = shd; maxP = mimbuild.getpValue(); maxNumMeasures = numClustered(partition); System.out.println("maxNumMeasures = " + maxNumMeasures); System.out.println("maxScore = " + maxScore); System.out.println("maxP = " + maxP); System.out.println("clusters = " + clusterSizes(partition, trueClusters)); } System.out.print( shd + "\t" + nf.format(pValue) + " " // + (error ? 1 : 0) + " " // + (pBelow05 ? "P < 0.05 " : "") // + (impureCluster ? "Impure cluster " : "") // + (numClustersGreaterThan5 ? "# Clusters != 5 " : "") // + clusterSizes(partition, trueClusters) + numClustered(partition)); System.out.println(); } System.out.println("\nAvg SHD for not-flagged cases = " + (totalError / (double) errorCount)); System.out.println("maxNumMeasures = " + maxNumMeasures); System.out.println("maxScore = " + maxScore); System.out.println("maxP = " + maxP); }
/** * Executes the algorithm, producing (at least) a result workbench. Must be implemented in the * extending class. */ public void execute() { Object source = dataWrapper.getSelectedDataModel(); DataModel dataModel = (DataModel) source; IKnowledge knowledge = params2.getKnowledge(); if (initialGraph == null) { initialGraph = new EdgeListGraph(dataModel.getVariables()); } Graph graph2 = new EdgeListGraph(initialGraph); graph2 = GraphUtils.replaceNodes(graph2, dataModel.getVariables()); Bff search; if (dataModel instanceof DataSet) { DataSet dataSet = (DataSet) dataModel; if (getAlgorithmType() == AlgorithmType.BEAM) { search = new BffBeam(graph2, dataSet, knowledge); } else if (getAlgorithmType() == AlgorithmType.GES) { search = new BffGes(graph2, dataSet); search.setKnowledge(knowledge); } else { throw new IllegalStateException(); } } else if (dataModel instanceof CovarianceMatrix) { CovarianceMatrix covarianceMatrix = (CovarianceMatrix) dataModel; if (getAlgorithmType() == AlgorithmType.BEAM) { search = new BffBeam(graph2, covarianceMatrix, knowledge); } else if (getAlgorithmType() == AlgorithmType.GES) { throw new IllegalArgumentException( "GES method requires a dataset; a covariance matrix was provided."); // search = new BffGes(graph2, covarianceMatrix); // search.setKnowledge(knowledge); } else { throw new IllegalStateException(); } } else { throw new IllegalStateException(); } PcIndTestParams indTestParams = (PcIndTestParams) getParams().getIndTestParams(); search.setAlpha(indTestParams.getAlpha()); search.setBeamWidth(indTestParams.getBeamWidth()); search.setHighPValueAlpha(indTestParams.getZeroEdgeP()); this.graph = search.search(); // this.graph = search.getNewSemIm().getSemPm().getGraph(); setOriginalSemIm(search.getOriginalSemIm()); this.newSemIm = search.getNewSemIm(); fireGraphChange(graph); if (getSourceGraph() != null) { GraphUtils.arrangeBySourceGraph(graph, getSourceGraph()); } else if (knowledge.isDefaultToKnowledgeLayout()) { SearchGraphUtils.arrangeByKnowledgeTiers(graph, knowledge); } else { GraphUtils.circleLayout(graph, 200, 200, 150); } setResultGraph(SearchGraphUtils.patternForDag(graph, knowledge)); }