예제 #1
0
  /**
   * Constructs a test using a given data set. If a data set is provided (that is, a tabular data
   * set), fourth moment statistics can be calculated (p. 160); otherwise, it must be assumed that
   * the data are multivariate Gaussian.
   */
  public DeltaSextadTest(DataSet dataSet) {
    if (dataSet == null) {
      throw new NullPointerException();
    }

    if (!dataSet.isContinuous()) {
      throw new IllegalArgumentException();
    }

    this.cov = new CovarianceMatrix(dataSet);

    List<DataSet> data1 = new ArrayList<DataSet>();
    data1.add(dataSet);
    List<DataSet> data2 = DataUtils.center(data1);

    this.dataSet = data2.get(0);

    this.data = this.dataSet.getDoubleData().transpose().toArray();
    this.N = dataSet.getNumRows();
    this.variables = dataSet.getVariables();
    this.numVars = dataSet.getNumColumns();

    this.variablesHash = new HashMap<Node, Integer>();

    for (int i = 0; i < variables.size(); i++) {
      variablesHash.put(variables.get(i), i);
    }

    this.means = new double[numVars];

    for (int i = 0; i < numVars; i++) {
      means[i] = mean(data[i], N);
    }
  }
  /**
   * Constructs a new Fisher Z independence test with the listed arguments.
   *
   * @param data A 2D continuous data set with no missing values.
   * @param variables A list of variables, a subset of the variables of <code>data</code>.
   * @param alpha The significance cutoff level. p values less than alpha will be reported as
   *     dependent.
   */
  public IndTestFisherZShortTriangular(TetradMatrix data, List<Node> variables, double alpha) {
    DataSet dataSet = ColtDataSet.makeContinuousData(variables, data);
    this.covMatrix = new ShortTriangularMatrix(dataSet.getNumColumns());
    this.covMatrix.becomeCorrelationMatrix(dataSet);
    this.variables = dataSet.getVariables();
    setAlpha(alpha);

    this.deterministicTest = new IndTestFisherZGeneralizedInverse(dataSet, alpha);
  }
예제 #3
0
  @Test
  public void test2() {
    RandomUtil.getInstance().setSeed(2999983L);

    int sampleSize = 1000;

    List<Node> variableNodes = new ArrayList<>();
    ContinuousVariable x1 = new ContinuousVariable("X1");
    ContinuousVariable x2 = new ContinuousVariable("X2");
    ContinuousVariable x3 = new ContinuousVariable("X3");
    ContinuousVariable x4 = new ContinuousVariable("X4");
    ContinuousVariable x5 = new ContinuousVariable("X5");

    variableNodes.add(x1);
    variableNodes.add(x2);
    variableNodes.add(x3);
    variableNodes.add(x4);
    variableNodes.add(x5);

    Graph _graph = new EdgeListGraph(variableNodes);
    SemGraph graph = new SemGraph(_graph);
    graph.addDirectedEdge(x1, x3);
    graph.addDirectedEdge(x2, x3);
    graph.addDirectedEdge(x3, x4);
    graph.addDirectedEdge(x2, x4);
    graph.addDirectedEdge(x4, x5);
    graph.addDirectedEdge(x2, x5);

    SemPm semPm = new SemPm(graph);
    SemIm semIm = new SemIm(semPm);
    DataSet dataSet = semIm.simulateData(sampleSize, false);

    print(semPm);

    GeneralizedSemPm _semPm = new GeneralizedSemPm(semPm);
    GeneralizedSemIm _semIm = new GeneralizedSemIm(_semPm, semIm);
    DataSet _dataSet = _semIm.simulateDataMinimizeSurface(sampleSize, false);

    print(_semPm);

    //        System.out.println(_dataSet);

    for (int j = 0; j < dataSet.getNumColumns(); j++) {
      double[] col = dataSet.getDoubleData().getColumn(j).toArray();
      double[] _col = _dataSet.getDoubleData().getColumn(j).toArray();

      double mean = StatUtils.mean(col);
      double _mean = StatUtils.mean(_col);

      double variance = StatUtils.variance(col);
      double _variance = StatUtils.variance(_col);

      assertEquals(mean, _mean, 0.3);
      assertEquals(1.0, variance / _variance, .2);
    }
  }
  /**
   * Constructs a new Independence test which checks independence facts based on the correlation
   * matrix implied by the given data set (must be continuous). The given significance level is
   * used.
   *
   * @param dataSet A data set containing only continuous columns.
   * @param alpha The alpha level of the test.
   */
  public IndTestFisherZShortTriangular(DataSet dataSet, double alpha) {
    if (!(dataSet.isContinuous())) {
      throw new IllegalArgumentException("Data set must be continuous.");
    }

    this.covMatrix = new ShortTriangularMatrix(dataSet.getNumColumns());
    this.covMatrix.becomeCorrelationMatrix(dataSet);
    this.variables = dataSet.getVariables();
    setAlpha(alpha);

    this.deterministicTest = new IndTestFisherZGeneralizedInverse(dataSet, alpha);
    this.dataSet = dataSet;
  }
예제 #5
0
  /** Creates a cell count table for the given data set. */
  public DataSetProbs(DataSet dataSet) {
    if (dataSet == null) {
      throw new NullPointerException();
    }

    this.dataSet = dataSet;
    dims = new int[dataSet.getNumColumns()];

    for (int i = 0; i < dims.length; i++) {
      DiscreteVariable variable = (DiscreteVariable) dataSet.getVariable(i);
      dims[i] = variable.getNumCategories();
    }

    numRows = dataSet.getNumRows();
  }
예제 #6
0
  public void actionPerformed(ActionEvent e) {
    DataSet dataSet = (DataSet) dataEditor.getSelectedDataModel();
    if (dataSet == null || dataSet.getNumColumns() == 0) {
      JOptionPane.showMessageDialog(
          findOwner(), "Cannot display a scatter plot for an empty data set.");
      return;
    }

    JPanel panel = new ScatterPlotView(dataSet);
    EditorWindow editorWindow = new EditorWindow(panel, "Scatter Plots", "Save", true, dataEditor);

    //        JPanel dialog = createScatterPlotDialog(null, null);
    //        EditorWindow editorWindow = new EditorWindow(dialog, "Scatter Plots", "Save", true,
    // dataEditor);

    DesktopController.getInstance().addEditorWindow(editorWindow, JLayeredPane.PALETTE_LAYER);
    editorWindow.pack();
    editorWindow.setVisible(true);
  }
예제 #7
0
  /** @return the splitNames selected by the editor. */
  public static DataModel createSplits(DataSet dataSet, SplitCasesParams params) {
    List<Integer> indices = new ArrayList<Integer>(dataSet.getNumRows());
    for (int i = 0; i < dataSet.getNumRows(); i++) {
      indices.add(i);
    }

    if (params.isDataShuffled()) {
      Collections.shuffle(indices);
    }

    SplitCasesSpec spec = params.getSpec();
    int numSplits = params.getNumSplits();
    int sampleSize = spec.getSampleSize();
    int[] breakpoints = spec.getBreakpoints();
    List<String> splitNames = spec.getSplitNames();

    int[] _breakpoints = new int[breakpoints.length + 2];
    _breakpoints[0] = 0;
    _breakpoints[_breakpoints.length - 1] = sampleSize;
    System.arraycopy(breakpoints, 0, _breakpoints, 1, breakpoints.length);

    DataModelList list = new DataModelList();
    int ncols = dataSet.getNumColumns();
    for (int n = 0; n < numSplits; n++) {
      int _sampleSize = _breakpoints[n + 1] - _breakpoints[n];

      DataSet _data = new ColtDataSet(_sampleSize, dataSet.getVariables());
      _data.setName(splitNames.get(n));

      for (int i = 0; i < _sampleSize; i++) {
        int oldCase = indices.get(i + _breakpoints[n]);

        for (int j = 0; j < ncols; j++) {
          _data.setObject(i, j, dataSet.getObject(oldCase, j));
        }
      }

      list.add(_data);
    }

    return list;
  }
예제 #8
0
  private String reportIfDiscrete(Graph dag, DataSet dataSet) {
    List vars = dataSet.getVariables();
    Map<String, DiscreteVariable> nodesToVars = new HashMap<String, DiscreteVariable>();
    for (int i = 0; i < dataSet.getNumColumns(); i++) {
      DiscreteVariable var = (DiscreteVariable) vars.get(i);
      String name = var.getName();
      Node node = new GraphNode(name);
      nodesToVars.put(node.getName(), var);
    }

    BayesPm bayesPm = new BayesPm(new Dag(dag));
    List<Node> nodes = bayesPm.getDag().getNodes();

    for (Node node : nodes) {
      Node var = nodesToVars.get(node.getName());

      if (var instanceof DiscreteVariable) {
        DiscreteVariable var2 = nodesToVars.get(node.getName());
        int numCategories = var2.getNumCategories();
        List<String> categories = new ArrayList<String>();
        for (int j = 0; j < numCategories; j++) {
          categories.add(var2.getCategory(j));
        }
        bayesPm.setCategories(node, categories);
      }
    }

    BayesProperties properties = new BayesProperties(dataSet, dag);
    properties.setGraph(dag);

    NumberFormat nf = NumberFormat.getInstance();
    nf.setMaximumFractionDigits(4);

    StringBuilder buf = new StringBuilder();
    buf.append("\nP-value = ").append(properties.getLikelihoodRatioP());
    buf.append("\nDf = ").append(properties.getPValueDf());
    buf.append("\nChi square = ").append(nf.format(properties.getPValueChisq()));
    buf.append("\nBIC score = ").append(nf.format(properties.getBic()));
    buf.append("\n\nH0: Completely disconnected graph.");

    return buf.toString();
  }
예제 #9
0
  public void actionPerformed(ActionEvent e) {
    DataSet dataSet = (DataSet) dataEditor.getSelectedDataModel();
    if (dataSet == null || dataSet.getNumColumns() == 0) {
      JOptionPane.showMessageDialog(
          findOwner(), "Cannot run normality tests on an empty data set.");
      return;
    }
    // if there are missing values warn and don't display q-q plot.
    //        if(DataUtils.containsMissingValue(dataSet)){
    //            JOptionPane.showMessageDialog(findOwner(), new JLabel("<html>Data has missing
    // values, " +
    //                    "remove all missing values before<br>" +
    //                    "running normality tests.</html>"));
    //            return;
    //        }

    JPanel panel = createNormalityTestDialog(null);

    EditorWindow window = new EditorWindow(panel, "Normality Tests", "Close", false, dataEditor);
    DesktopController.getInstance().addEditorWindow(window, JLayeredPane.PALETTE_LAYER);
    window.setVisible(true);
  }
예제 #10
0
  public final DataSet filter(DataSet dataSet) {

    // Why does it have to be discrete? Why can't we simply expand
    // whatever discrete columns are there and leave the continuous
    // ones untouched? jdramsey 7/4/2005
    //        if (!(dataSet.isDiscrete())) {
    //            throw new IllegalArgumentException("Data set must be discrete.");
    //        }

    List<Node> variables = new LinkedList<>();

    // Add all of the variables to the new data set.
    for (int j = 0; j < dataSet.getNumColumns(); j++) {
      Node _var = dataSet.getVariable(j);

      if (!(_var instanceof DiscreteVariable)) {
        variables.add(_var);
        continue;
      }

      DiscreteVariable variable = (DiscreteVariable) _var;

      String oldName = variable.getName();
      List<String> oldCategories = variable.getCategories();
      List<String> newCategories = new LinkedList<>(oldCategories);

      String newCategory = "Missing";
      int _j = 0;

      while (oldCategories.contains(newCategory)) {
        newCategory = "Missing" + (++_j);
      }

      newCategories.add(newCategory);
      String newName = oldName + "+";
      DiscreteVariable newVariable = new DiscreteVariable(newName, newCategories);

      variables.add(newVariable);
    }

    DataSet newDataSet = new ColtDataSet(dataSet.getNumRows(), variables);

    // Copy old values to new data set, replacing missing values with new
    // "MissingValue" categories.
    for (int j = 0; j < dataSet.getNumColumns(); j++) {
      Node _var = dataSet.getVariable(j);

      if (_var instanceof ContinuousVariable) {
        for (int i = 0; i < dataSet.getNumRows(); i++) {
          newDataSet.setDouble(i, j, dataSet.getDouble(i, j));
        }
      } else if (_var instanceof DiscreteVariable) {
        DiscreteVariable variable = (DiscreteVariable) _var;
        int numCategories = variable.getNumCategories();

        for (int i = 0; i < dataSet.getNumRows(); i++) {
          int value = dataSet.getInt(i, j);

          if (value == DiscreteVariable.MISSING_VALUE) {
            newDataSet.setInt(i, j, numCategories);
          } else {
            newDataSet.setInt(i, j, value);
          }
        }
      }
    }

    return newDataSet;
  }