/** * Constructs a test using a given data set. If a data set is provided (that is, a tabular data * set), fourth moment statistics can be calculated (p. 160); otherwise, it must be assumed that * the data are multivariate Gaussian. */ public DeltaSextadTest(DataSet dataSet) { if (dataSet == null) { throw new NullPointerException(); } if (!dataSet.isContinuous()) { throw new IllegalArgumentException(); } this.cov = new CovarianceMatrix(dataSet); List<DataSet> data1 = new ArrayList<DataSet>(); data1.add(dataSet); List<DataSet> data2 = DataUtils.center(data1); this.dataSet = data2.get(0); this.data = this.dataSet.getDoubleData().transpose().toArray(); this.N = dataSet.getNumRows(); this.variables = dataSet.getVariables(); this.numVars = dataSet.getNumColumns(); this.variablesHash = new HashMap<Node, Integer>(); for (int i = 0; i < variables.size(); i++) { variablesHash.put(variables.get(i), i); } this.means = new double[numVars]; for (int i = 0; i < numVars; i++) { means[i] = mean(data[i], N); } }
/** * Constructs a new Fisher Z independence test with the listed arguments. * * @param data A 2D continuous data set with no missing values. * @param variables A list of variables, a subset of the variables of <code>data</code>. * @param alpha The significance cutoff level. p values less than alpha will be reported as * dependent. */ public IndTestFisherZShortTriangular(TetradMatrix data, List<Node> variables, double alpha) { DataSet dataSet = ColtDataSet.makeContinuousData(variables, data); this.covMatrix = new ShortTriangularMatrix(dataSet.getNumColumns()); this.covMatrix.becomeCorrelationMatrix(dataSet); this.variables = dataSet.getVariables(); setAlpha(alpha); this.deterministicTest = new IndTestFisherZGeneralizedInverse(dataSet, alpha); }
@Test public void test2() { RandomUtil.getInstance().setSeed(2999983L); int sampleSize = 1000; List<Node> variableNodes = new ArrayList<>(); ContinuousVariable x1 = new ContinuousVariable("X1"); ContinuousVariable x2 = new ContinuousVariable("X2"); ContinuousVariable x3 = new ContinuousVariable("X3"); ContinuousVariable x4 = new ContinuousVariable("X4"); ContinuousVariable x5 = new ContinuousVariable("X5"); variableNodes.add(x1); variableNodes.add(x2); variableNodes.add(x3); variableNodes.add(x4); variableNodes.add(x5); Graph _graph = new EdgeListGraph(variableNodes); SemGraph graph = new SemGraph(_graph); graph.addDirectedEdge(x1, x3); graph.addDirectedEdge(x2, x3); graph.addDirectedEdge(x3, x4); graph.addDirectedEdge(x2, x4); graph.addDirectedEdge(x4, x5); graph.addDirectedEdge(x2, x5); SemPm semPm = new SemPm(graph); SemIm semIm = new SemIm(semPm); DataSet dataSet = semIm.simulateData(sampleSize, false); print(semPm); GeneralizedSemPm _semPm = new GeneralizedSemPm(semPm); GeneralizedSemIm _semIm = new GeneralizedSemIm(_semPm, semIm); DataSet _dataSet = _semIm.simulateDataMinimizeSurface(sampleSize, false); print(_semPm); // System.out.println(_dataSet); for (int j = 0; j < dataSet.getNumColumns(); j++) { double[] col = dataSet.getDoubleData().getColumn(j).toArray(); double[] _col = _dataSet.getDoubleData().getColumn(j).toArray(); double mean = StatUtils.mean(col); double _mean = StatUtils.mean(_col); double variance = StatUtils.variance(col); double _variance = StatUtils.variance(_col); assertEquals(mean, _mean, 0.3); assertEquals(1.0, variance / _variance, .2); } }
/** * Constructs a new Independence test which checks independence facts based on the correlation * matrix implied by the given data set (must be continuous). The given significance level is * used. * * @param dataSet A data set containing only continuous columns. * @param alpha The alpha level of the test. */ public IndTestFisherZShortTriangular(DataSet dataSet, double alpha) { if (!(dataSet.isContinuous())) { throw new IllegalArgumentException("Data set must be continuous."); } this.covMatrix = new ShortTriangularMatrix(dataSet.getNumColumns()); this.covMatrix.becomeCorrelationMatrix(dataSet); this.variables = dataSet.getVariables(); setAlpha(alpha); this.deterministicTest = new IndTestFisherZGeneralizedInverse(dataSet, alpha); this.dataSet = dataSet; }
/** Creates a cell count table for the given data set. */ public DataSetProbs(DataSet dataSet) { if (dataSet == null) { throw new NullPointerException(); } this.dataSet = dataSet; dims = new int[dataSet.getNumColumns()]; for (int i = 0; i < dims.length; i++) { DiscreteVariable variable = (DiscreteVariable) dataSet.getVariable(i); dims[i] = variable.getNumCategories(); } numRows = dataSet.getNumRows(); }
public void actionPerformed(ActionEvent e) { DataSet dataSet = (DataSet) dataEditor.getSelectedDataModel(); if (dataSet == null || dataSet.getNumColumns() == 0) { JOptionPane.showMessageDialog( findOwner(), "Cannot display a scatter plot for an empty data set."); return; } JPanel panel = new ScatterPlotView(dataSet); EditorWindow editorWindow = new EditorWindow(panel, "Scatter Plots", "Save", true, dataEditor); // JPanel dialog = createScatterPlotDialog(null, null); // EditorWindow editorWindow = new EditorWindow(dialog, "Scatter Plots", "Save", true, // dataEditor); DesktopController.getInstance().addEditorWindow(editorWindow, JLayeredPane.PALETTE_LAYER); editorWindow.pack(); editorWindow.setVisible(true); }
/** @return the splitNames selected by the editor. */ public static DataModel createSplits(DataSet dataSet, SplitCasesParams params) { List<Integer> indices = new ArrayList<Integer>(dataSet.getNumRows()); for (int i = 0; i < dataSet.getNumRows(); i++) { indices.add(i); } if (params.isDataShuffled()) { Collections.shuffle(indices); } SplitCasesSpec spec = params.getSpec(); int numSplits = params.getNumSplits(); int sampleSize = spec.getSampleSize(); int[] breakpoints = spec.getBreakpoints(); List<String> splitNames = spec.getSplitNames(); int[] _breakpoints = new int[breakpoints.length + 2]; _breakpoints[0] = 0; _breakpoints[_breakpoints.length - 1] = sampleSize; System.arraycopy(breakpoints, 0, _breakpoints, 1, breakpoints.length); DataModelList list = new DataModelList(); int ncols = dataSet.getNumColumns(); for (int n = 0; n < numSplits; n++) { int _sampleSize = _breakpoints[n + 1] - _breakpoints[n]; DataSet _data = new ColtDataSet(_sampleSize, dataSet.getVariables()); _data.setName(splitNames.get(n)); for (int i = 0; i < _sampleSize; i++) { int oldCase = indices.get(i + _breakpoints[n]); for (int j = 0; j < ncols; j++) { _data.setObject(i, j, dataSet.getObject(oldCase, j)); } } list.add(_data); } return list; }
private String reportIfDiscrete(Graph dag, DataSet dataSet) { List vars = dataSet.getVariables(); Map<String, DiscreteVariable> nodesToVars = new HashMap<String, DiscreteVariable>(); for (int i = 0; i < dataSet.getNumColumns(); i++) { DiscreteVariable var = (DiscreteVariable) vars.get(i); String name = var.getName(); Node node = new GraphNode(name); nodesToVars.put(node.getName(), var); } BayesPm bayesPm = new BayesPm(new Dag(dag)); List<Node> nodes = bayesPm.getDag().getNodes(); for (Node node : nodes) { Node var = nodesToVars.get(node.getName()); if (var instanceof DiscreteVariable) { DiscreteVariable var2 = nodesToVars.get(node.getName()); int numCategories = var2.getNumCategories(); List<String> categories = new ArrayList<String>(); for (int j = 0; j < numCategories; j++) { categories.add(var2.getCategory(j)); } bayesPm.setCategories(node, categories); } } BayesProperties properties = new BayesProperties(dataSet, dag); properties.setGraph(dag); NumberFormat nf = NumberFormat.getInstance(); nf.setMaximumFractionDigits(4); StringBuilder buf = new StringBuilder(); buf.append("\nP-value = ").append(properties.getLikelihoodRatioP()); buf.append("\nDf = ").append(properties.getPValueDf()); buf.append("\nChi square = ").append(nf.format(properties.getPValueChisq())); buf.append("\nBIC score = ").append(nf.format(properties.getBic())); buf.append("\n\nH0: Completely disconnected graph."); return buf.toString(); }
public void actionPerformed(ActionEvent e) { DataSet dataSet = (DataSet) dataEditor.getSelectedDataModel(); if (dataSet == null || dataSet.getNumColumns() == 0) { JOptionPane.showMessageDialog( findOwner(), "Cannot run normality tests on an empty data set."); return; } // if there are missing values warn and don't display q-q plot. // if(DataUtils.containsMissingValue(dataSet)){ // JOptionPane.showMessageDialog(findOwner(), new JLabel("<html>Data has missing // values, " + // "remove all missing values before<br>" + // "running normality tests.</html>")); // return; // } JPanel panel = createNormalityTestDialog(null); EditorWindow window = new EditorWindow(panel, "Normality Tests", "Close", false, dataEditor); DesktopController.getInstance().addEditorWindow(window, JLayeredPane.PALETTE_LAYER); window.setVisible(true); }
public final DataSet filter(DataSet dataSet) { // Why does it have to be discrete? Why can't we simply expand // whatever discrete columns are there and leave the continuous // ones untouched? jdramsey 7/4/2005 // if (!(dataSet.isDiscrete())) { // throw new IllegalArgumentException("Data set must be discrete."); // } List<Node> variables = new LinkedList<>(); // Add all of the variables to the new data set. for (int j = 0; j < dataSet.getNumColumns(); j++) { Node _var = dataSet.getVariable(j); if (!(_var instanceof DiscreteVariable)) { variables.add(_var); continue; } DiscreteVariable variable = (DiscreteVariable) _var; String oldName = variable.getName(); List<String> oldCategories = variable.getCategories(); List<String> newCategories = new LinkedList<>(oldCategories); String newCategory = "Missing"; int _j = 0; while (oldCategories.contains(newCategory)) { newCategory = "Missing" + (++_j); } newCategories.add(newCategory); String newName = oldName + "+"; DiscreteVariable newVariable = new DiscreteVariable(newName, newCategories); variables.add(newVariable); } DataSet newDataSet = new ColtDataSet(dataSet.getNumRows(), variables); // Copy old values to new data set, replacing missing values with new // "MissingValue" categories. for (int j = 0; j < dataSet.getNumColumns(); j++) { Node _var = dataSet.getVariable(j); if (_var instanceof ContinuousVariable) { for (int i = 0; i < dataSet.getNumRows(); i++) { newDataSet.setDouble(i, j, dataSet.getDouble(i, j)); } } else if (_var instanceof DiscreteVariable) { DiscreteVariable variable = (DiscreteVariable) _var; int numCategories = variable.getNumCategories(); for (int i = 0; i < dataSet.getNumRows(); i++) { int value = dataSet.getInt(i, j); if (value == DiscreteVariable.MISSING_VALUE) { newDataSet.setInt(i, j, numCategories); } else { newDataSet.setInt(i, j, value); } } } } return newDataSet; }