private List<Node> expandVariable(DataSet dataSet, Node node) {
    if (node instanceof ContinuousVariable) {
      return Collections.singletonList(node);
    }

    if (node instanceof DiscreteVariable && ((DiscreteVariable) node).getNumCategories() < 3) {
      return Collections.singletonList(node);
    }

    if (!(node instanceof DiscreteVariable)) {
      throw new IllegalArgumentException();
    }

    List<String> varCats = new ArrayList<String>(((DiscreteVariable) node).getCategories());

    // first category is reference
    varCats.remove(0);
    List<Node> variables = new ArrayList<Node>();

    for (String cat : varCats) {

      Node newVar;

      do {
        String newVarName = node.getName() + "MULTINOM" + "." + cat;
        newVar = new DiscreteVariable(newVarName, 2);
      } while (dataSet.getVariable(newVar.getName()) != null);

      variables.add(newVar);

      dataSet.addVariable(newVar);
      int newVarIndex = dataSet.getColumn(newVar);
      int numCases = dataSet.getNumRows();

      for (int l = 0; l < numCases; l++) {
        Object dataCell = dataSet.getObject(l, dataSet.getColumn(node));
        int dataCellIndex = ((DiscreteVariable) node).getIndex(dataCell.toString());

        if (dataCellIndex == ((DiscreteVariable) node).getIndex(cat))
          dataSet.setInt(l, newVarIndex, 1);
        else dataSet.setInt(l, newVarIndex, 0);
      }
    }

    return variables;
  }
  private void initialize() {
    DirichletBayesIm prior = DirichletBayesIm.symmetricDirichletIm(bayesPmObs, 0.5);
    observedIm = DirichletEstimator.estimate(prior, dataSet);

    //        MLBayesEstimator dirichEst = new MLBayesEstimator();
    //        observedIm = dirichEst.estimate(bayesPmObs, dataSet);

    //        System.out.println("Estimated Bayes IM for Measured Variables:  ");
    //        System.out.println(observedIm);

    // mixedData should be ddsNm with new columns for the latent variables.
    // Each such column should contain missing data for each case.

    int numFullCases = dataSet.getNumRows();
    List<Node> variables = new LinkedList<Node>();

    for (Node node : nodes) {
      if (node.getNodeType() == NodeType.LATENT) {
        int numCategories = bayesPm.getNumCategories(node);
        DiscreteVariable latentVar = new DiscreteVariable(node.getName(), numCategories);
        variables.add(latentVar);
      } else {
        String name = bayesPm.getVariable(node).getName();
        Node variable = dataSet.getVariable(name);
        variables.add(variable);
      }
    }

    DataSet dsMixed = new ColtDataSet(numFullCases, variables);

    for (int j = 0; j < nodes.length; j++) {
      if (nodes[j].getNodeType() == NodeType.LATENT) {
        for (int i = 0; i < numFullCases; i++) {
          dsMixed.setInt(i, j, -99);
        }
      } else {
        String name = bayesPm.getVariable(nodes[j]).getName();
        Node variable = dataSet.getVariable(name);
        int index = dataSet.getColumn(variable);

        for (int i = 0; i < numFullCases; i++) {
          dsMixed.setInt(i, j, dataSet.getInt(i, index));
        }
      }
    }

    //        System.out.println(dsMixed);

    mixedData = dsMixed;
    allVariables = mixedData.getVariables();

    // Find the bayes net which is parameterized using mixedData or set randomly when that's
    // not possible.
    estimateIM(bayesPm, mixedData);

    // The following DEBUG section tests a case specified by P. Spirtes
    // DEBUG TAIL:   For use with embayes_l1x1x2x3V3.dat
    /*
    Node l1Node = graph.getNode("L1");
    //int l1Index = bayesImMixed.getNodeIndex(l1Node);
    int l1index = estimatedIm.getNodeIndex(l1Node);
    Node x1Node = graph.getNode("X1");
    //int x1Index = bayesImMixed.getNodeIndex(x1Node);
    int x1Index = estimatedIm.getNodeIndex(x1Node);
    Node x2Node = graph.getNode("X2");
    //int x2Index = bayesImMixed.getNodeIndex(x2Node);
    int x2Index = estimatedIm.getNodeIndex(x2Node);
    Node x3Node = graph.getNode("X3");
    //int x3Index = bayesImMixed.getNodeIndex(x3Node);
    int x3Index = estimatedIm.getNodeIndex(x3Node);

    estimatedIm.setProbability(l1index, 0, 0, 0.5);
    estimatedIm.setProbability(l1index, 0, 1, 0.5);

    //bayesImMixed.setProbability(x1Index, 0, 0, 0.33333);
    //bayesImMixed.setProbability(x1Index, 0, 1, 0.66667);
    estimatedIm.setProbability(x1Index, 0, 0, 0.6);      //p(x1 = 0 | l1 = 0)
    estimatedIm.setProbability(x1Index, 0, 1, 0.4);      //p(x1 = 1 | l1 = 0)
    estimatedIm.setProbability(x1Index, 1, 0, 0.4);      //p(x1 = 0 | l1 = 1)
    estimatedIm.setProbability(x1Index, 1, 1, 0.6);      //p(x1 = 1 | l1 = 1)

    //bayesImMixed.setProbability(x2Index, 1, 0, 0.66667);
    //bayesImMixed.setProbability(x2Index, 1, 1, 0.33333);
    estimatedIm.setProbability(x2Index, 1, 0, 0.4);      //p(x2 = 0 | l1 = 1)
    estimatedIm.setProbability(x2Index, 1, 1, 0.6);      //p(x2 = 1 | l1 = 1)
    estimatedIm.setProbability(x2Index, 0, 0, 0.6);      //p(x2 = 0 | l1 = 0)
    estimatedIm.setProbability(x2Index, 0, 1, 0.4);      //p(x2 = 1 | l1 = 0)

    //bayesImMixed.setProbability(x3Index, 1, 0, 0.66667);
    //bayesImMixed.setProbability(x3Index, 1, 1, 0.33333);
    estimatedIm.setProbability(x3Index, 1, 0, 0.4);      //p(x3 = 0 | l1 = 1)
    estimatedIm.setProbability(x3Index, 1, 1, 0.6);      //p(x3 = 1 | l1 = 1)
    estimatedIm.setProbability(x3Index, 0, 0, 0.6);      //p(x3 = 0 | l1 = 0)
    estimatedIm.setProbability(x3Index, 0, 1, 0.4);      //p(x3 = 1 | l1 = 0)
    */
    // END of TAIL

    // System.out.println("bayes IM estimated by estimateIM");
    // System.out.println(bayesImMixed);
    // System.out.println(estimatedIm);

    estimatedCounts = new double[nodes.length][][];
    estimatedCountsDenom = new double[nodes.length][];
    condProbs = new double[nodes.length][][];

    for (int i = 0; i < nodes.length; i++) {
      // int numRows = bayesImMixed.getNumRows(i);
      int numRows = estimatedIm.getNumRows(i);
      estimatedCounts[i] = new double[numRows][];
      estimatedCountsDenom[i] = new double[numRows];
      condProbs[i] = new double[numRows][];
      // for(int j = 0; j < bayesImMixed.getNumRows(i); j++) {
      for (int j = 0; j < estimatedIm.getNumRows(i); j++) {
        // int numCols = bayesImMixed.getNumColumns(i);
        int numCols = estimatedIm.getNumColumns(i);
        estimatedCounts[i][j] = new double[numCols];
        condProbs[i][j] = new double[numCols];
      }
    }
  }
  private void createDiscreteTimeSeriesData() {

    // GIVEN: Continuous data set D, maximum lag m.
    Node[] dataVars = dataSet.getVariables().toArray(new Node[0]);
    int n = dataVars.length;
    int m = getNumLags();

    // LetXi, i = 0,...,n-1, be the variables from the data. Let Xi(t) be
    // the variable Xi at time lag t (before 0), t = 0,...,m.
    Node[][] laggedVars = new Node[m + 1][n];
    Knowledge knowledge = new Knowledge();

    for (int s = 0; s <= m; s++) {
      for (int j = 0; j < n; j++) {
        String name1 = dataVars[j].getName();
        String name2 = name1 + "." + (s + 1);
        laggedVars[s][j] = new DiscreteVariable((DiscreteVariable) dataVars[j]);
        laggedVars[s][j].setName(name2);
        laggedVars[s][j].setCenter(80 * j + 50, 80 * (m - s) + 50);
        knowledge.addToTier(s, laggedVars[s][j].getName());
      }
    }

    // 2. Prepare the data the way you did.
    List<Node> variables = new LinkedList<Node>();

    for (int s = 0; s <= m; s++) {
      for (int i = 0; i < n; i++) {
        int[] rawData = new int[dataSet.getNumRows()];

        for (int j = 0; j < dataSet.getNumRows(); j++) {
          rawData[j] = dataSet.getInt(j, i);
        }

        int size = dataSet.getNumRows();

        int[] laggedRaw = new int[size - m + 1];
        System.arraycopy(rawData, m - s, laggedRaw, 0, size - m + 1);
        variables.add(laggedVars[s][i]);
      }
    }

    DataSet _laggedData = new ColtDataSet(dataSet.getNumRows() - m + 1, variables);

    for (int s = 0; s <= m; s++) {
      for (int i = 0; i < n; i++) {
        int[] rawData = new int[dataSet.getNumRows()];

        for (int j = 0; j < dataSet.getNumRows(); j++) {
          rawData[j] = dataSet.getInt(j, i);
        }

        int size = dataSet.getNumRows();

        int[] laggedRaw = new int[size - m + 1];
        System.arraycopy(rawData, m - s, laggedRaw, 0, size - m + 1);
        int _col = _laggedData.getColumn(laggedVars[s][i]);

        for (int j = 0; j < dataSet.getNumRows(); j++) {
          _laggedData.setInt(j, _col, laggedRaw[j]);
        }
      }
    }

    knowledge.setDefaultToKnowledgeLayout(true);
    _laggedData.setKnowledge(knowledge);
    DataModelList list = new DataModelList();
    list.add(_laggedData);
    getDataEditor().reset(list);
    getDataEditor().selectLastTab();
  }
Esempio n. 4
0
  public final DataSet filter(DataSet dataSet) {

    // Why does it have to be discrete? Why can't we simply expand
    // whatever discrete columns are there and leave the continuous
    // ones untouched? jdramsey 7/4/2005
    //        if (!(dataSet.isDiscrete())) {
    //            throw new IllegalArgumentException("Data set must be discrete.");
    //        }

    List<Node> variables = new LinkedList<>();

    // Add all of the variables to the new data set.
    for (int j = 0; j < dataSet.getNumColumns(); j++) {
      Node _var = dataSet.getVariable(j);

      if (!(_var instanceof DiscreteVariable)) {
        variables.add(_var);
        continue;
      }

      DiscreteVariable variable = (DiscreteVariable) _var;

      String oldName = variable.getName();
      List<String> oldCategories = variable.getCategories();
      List<String> newCategories = new LinkedList<>(oldCategories);

      String newCategory = "Missing";
      int _j = 0;

      while (oldCategories.contains(newCategory)) {
        newCategory = "Missing" + (++_j);
      }

      newCategories.add(newCategory);
      String newName = oldName + "+";
      DiscreteVariable newVariable = new DiscreteVariable(newName, newCategories);

      variables.add(newVariable);
    }

    DataSet newDataSet = new ColtDataSet(dataSet.getNumRows(), variables);

    // Copy old values to new data set, replacing missing values with new
    // "MissingValue" categories.
    for (int j = 0; j < dataSet.getNumColumns(); j++) {
      Node _var = dataSet.getVariable(j);

      if (_var instanceof ContinuousVariable) {
        for (int i = 0; i < dataSet.getNumRows(); i++) {
          newDataSet.setDouble(i, j, dataSet.getDouble(i, j));
        }
      } else if (_var instanceof DiscreteVariable) {
        DiscreteVariable variable = (DiscreteVariable) _var;
        int numCategories = variable.getNumCategories();

        for (int i = 0; i < dataSet.getNumRows(); i++) {
          int value = dataSet.getInt(i, j);

          if (value == DiscreteVariable.MISSING_VALUE) {
            newDataSet.setInt(i, j, numCategories);
          } else {
            newDataSet.setInt(i, j, value);
          }
        }
      }
    }

    return newDataSet;
  }