private List<Node> expandVariable(DataSet dataSet, Node node) { if (node instanceof ContinuousVariable) { return Collections.singletonList(node); } if (node instanceof DiscreteVariable && ((DiscreteVariable) node).getNumCategories() < 3) { return Collections.singletonList(node); } if (!(node instanceof DiscreteVariable)) { throw new IllegalArgumentException(); } List<String> varCats = new ArrayList<String>(((DiscreteVariable) node).getCategories()); // first category is reference varCats.remove(0); List<Node> variables = new ArrayList<Node>(); for (String cat : varCats) { Node newVar; do { String newVarName = node.getName() + "MULTINOM" + "." + cat; newVar = new DiscreteVariable(newVarName, 2); } while (dataSet.getVariable(newVar.getName()) != null); variables.add(newVar); dataSet.addVariable(newVar); int newVarIndex = dataSet.getColumn(newVar); int numCases = dataSet.getNumRows(); for (int l = 0; l < numCases; l++) { Object dataCell = dataSet.getObject(l, dataSet.getColumn(node)); int dataCellIndex = ((DiscreteVariable) node).getIndex(dataCell.toString()); if (dataCellIndex == ((DiscreteVariable) node).getIndex(cat)) dataSet.setInt(l, newVarIndex, 1); else dataSet.setInt(l, newVarIndex, 0); } } return variables; }
private void initialize() { DirichletBayesIm prior = DirichletBayesIm.symmetricDirichletIm(bayesPmObs, 0.5); observedIm = DirichletEstimator.estimate(prior, dataSet); // MLBayesEstimator dirichEst = new MLBayesEstimator(); // observedIm = dirichEst.estimate(bayesPmObs, dataSet); // System.out.println("Estimated Bayes IM for Measured Variables: "); // System.out.println(observedIm); // mixedData should be ddsNm with new columns for the latent variables. // Each such column should contain missing data for each case. int numFullCases = dataSet.getNumRows(); List<Node> variables = new LinkedList<Node>(); for (Node node : nodes) { if (node.getNodeType() == NodeType.LATENT) { int numCategories = bayesPm.getNumCategories(node); DiscreteVariable latentVar = new DiscreteVariable(node.getName(), numCategories); variables.add(latentVar); } else { String name = bayesPm.getVariable(node).getName(); Node variable = dataSet.getVariable(name); variables.add(variable); } } DataSet dsMixed = new ColtDataSet(numFullCases, variables); for (int j = 0; j < nodes.length; j++) { if (nodes[j].getNodeType() == NodeType.LATENT) { for (int i = 0; i < numFullCases; i++) { dsMixed.setInt(i, j, -99); } } else { String name = bayesPm.getVariable(nodes[j]).getName(); Node variable = dataSet.getVariable(name); int index = dataSet.getColumn(variable); for (int i = 0; i < numFullCases; i++) { dsMixed.setInt(i, j, dataSet.getInt(i, index)); } } } // System.out.println(dsMixed); mixedData = dsMixed; allVariables = mixedData.getVariables(); // Find the bayes net which is parameterized using mixedData or set randomly when that's // not possible. estimateIM(bayesPm, mixedData); // The following DEBUG section tests a case specified by P. Spirtes // DEBUG TAIL: For use with embayes_l1x1x2x3V3.dat /* Node l1Node = graph.getNode("L1"); //int l1Index = bayesImMixed.getNodeIndex(l1Node); int l1index = estimatedIm.getNodeIndex(l1Node); Node x1Node = graph.getNode("X1"); //int x1Index = bayesImMixed.getNodeIndex(x1Node); int x1Index = estimatedIm.getNodeIndex(x1Node); Node x2Node = graph.getNode("X2"); //int x2Index = bayesImMixed.getNodeIndex(x2Node); int x2Index = estimatedIm.getNodeIndex(x2Node); Node x3Node = graph.getNode("X3"); //int x3Index = bayesImMixed.getNodeIndex(x3Node); int x3Index = estimatedIm.getNodeIndex(x3Node); estimatedIm.setProbability(l1index, 0, 0, 0.5); estimatedIm.setProbability(l1index, 0, 1, 0.5); //bayesImMixed.setProbability(x1Index, 0, 0, 0.33333); //bayesImMixed.setProbability(x1Index, 0, 1, 0.66667); estimatedIm.setProbability(x1Index, 0, 0, 0.6); //p(x1 = 0 | l1 = 0) estimatedIm.setProbability(x1Index, 0, 1, 0.4); //p(x1 = 1 | l1 = 0) estimatedIm.setProbability(x1Index, 1, 0, 0.4); //p(x1 = 0 | l1 = 1) estimatedIm.setProbability(x1Index, 1, 1, 0.6); //p(x1 = 1 | l1 = 1) //bayesImMixed.setProbability(x2Index, 1, 0, 0.66667); //bayesImMixed.setProbability(x2Index, 1, 1, 0.33333); estimatedIm.setProbability(x2Index, 1, 0, 0.4); //p(x2 = 0 | l1 = 1) estimatedIm.setProbability(x2Index, 1, 1, 0.6); //p(x2 = 1 | l1 = 1) estimatedIm.setProbability(x2Index, 0, 0, 0.6); //p(x2 = 0 | l1 = 0) estimatedIm.setProbability(x2Index, 0, 1, 0.4); //p(x2 = 1 | l1 = 0) //bayesImMixed.setProbability(x3Index, 1, 0, 0.66667); //bayesImMixed.setProbability(x3Index, 1, 1, 0.33333); estimatedIm.setProbability(x3Index, 1, 0, 0.4); //p(x3 = 0 | l1 = 1) estimatedIm.setProbability(x3Index, 1, 1, 0.6); //p(x3 = 1 | l1 = 1) estimatedIm.setProbability(x3Index, 0, 0, 0.6); //p(x3 = 0 | l1 = 0) estimatedIm.setProbability(x3Index, 0, 1, 0.4); //p(x3 = 1 | l1 = 0) */ // END of TAIL // System.out.println("bayes IM estimated by estimateIM"); // System.out.println(bayesImMixed); // System.out.println(estimatedIm); estimatedCounts = new double[nodes.length][][]; estimatedCountsDenom = new double[nodes.length][]; condProbs = new double[nodes.length][][]; for (int i = 0; i < nodes.length; i++) { // int numRows = bayesImMixed.getNumRows(i); int numRows = estimatedIm.getNumRows(i); estimatedCounts[i] = new double[numRows][]; estimatedCountsDenom[i] = new double[numRows]; condProbs[i] = new double[numRows][]; // for(int j = 0; j < bayesImMixed.getNumRows(i); j++) { for (int j = 0; j < estimatedIm.getNumRows(i); j++) { // int numCols = bayesImMixed.getNumColumns(i); int numCols = estimatedIm.getNumColumns(i); estimatedCounts[i][j] = new double[numCols]; condProbs[i][j] = new double[numCols]; } } }
private void createDiscreteTimeSeriesData() { // GIVEN: Continuous data set D, maximum lag m. Node[] dataVars = dataSet.getVariables().toArray(new Node[0]); int n = dataVars.length; int m = getNumLags(); // LetXi, i = 0,...,n-1, be the variables from the data. Let Xi(t) be // the variable Xi at time lag t (before 0), t = 0,...,m. Node[][] laggedVars = new Node[m + 1][n]; Knowledge knowledge = new Knowledge(); for (int s = 0; s <= m; s++) { for (int j = 0; j < n; j++) { String name1 = dataVars[j].getName(); String name2 = name1 + "." + (s + 1); laggedVars[s][j] = new DiscreteVariable((DiscreteVariable) dataVars[j]); laggedVars[s][j].setName(name2); laggedVars[s][j].setCenter(80 * j + 50, 80 * (m - s) + 50); knowledge.addToTier(s, laggedVars[s][j].getName()); } } // 2. Prepare the data the way you did. List<Node> variables = new LinkedList<Node>(); for (int s = 0; s <= m; s++) { for (int i = 0; i < n; i++) { int[] rawData = new int[dataSet.getNumRows()]; for (int j = 0; j < dataSet.getNumRows(); j++) { rawData[j] = dataSet.getInt(j, i); } int size = dataSet.getNumRows(); int[] laggedRaw = new int[size - m + 1]; System.arraycopy(rawData, m - s, laggedRaw, 0, size - m + 1); variables.add(laggedVars[s][i]); } } DataSet _laggedData = new ColtDataSet(dataSet.getNumRows() - m + 1, variables); for (int s = 0; s <= m; s++) { for (int i = 0; i < n; i++) { int[] rawData = new int[dataSet.getNumRows()]; for (int j = 0; j < dataSet.getNumRows(); j++) { rawData[j] = dataSet.getInt(j, i); } int size = dataSet.getNumRows(); int[] laggedRaw = new int[size - m + 1]; System.arraycopy(rawData, m - s, laggedRaw, 0, size - m + 1); int _col = _laggedData.getColumn(laggedVars[s][i]); for (int j = 0; j < dataSet.getNumRows(); j++) { _laggedData.setInt(j, _col, laggedRaw[j]); } } } knowledge.setDefaultToKnowledgeLayout(true); _laggedData.setKnowledge(knowledge); DataModelList list = new DataModelList(); list.add(_laggedData); getDataEditor().reset(list); getDataEditor().selectLastTab(); }
public final DataSet filter(DataSet dataSet) { // Why does it have to be discrete? Why can't we simply expand // whatever discrete columns are there and leave the continuous // ones untouched? jdramsey 7/4/2005 // if (!(dataSet.isDiscrete())) { // throw new IllegalArgumentException("Data set must be discrete."); // } List<Node> variables = new LinkedList<>(); // Add all of the variables to the new data set. for (int j = 0; j < dataSet.getNumColumns(); j++) { Node _var = dataSet.getVariable(j); if (!(_var instanceof DiscreteVariable)) { variables.add(_var); continue; } DiscreteVariable variable = (DiscreteVariable) _var; String oldName = variable.getName(); List<String> oldCategories = variable.getCategories(); List<String> newCategories = new LinkedList<>(oldCategories); String newCategory = "Missing"; int _j = 0; while (oldCategories.contains(newCategory)) { newCategory = "Missing" + (++_j); } newCategories.add(newCategory); String newName = oldName + "+"; DiscreteVariable newVariable = new DiscreteVariable(newName, newCategories); variables.add(newVariable); } DataSet newDataSet = new ColtDataSet(dataSet.getNumRows(), variables); // Copy old values to new data set, replacing missing values with new // "MissingValue" categories. for (int j = 0; j < dataSet.getNumColumns(); j++) { Node _var = dataSet.getVariable(j); if (_var instanceof ContinuousVariable) { for (int i = 0; i < dataSet.getNumRows(); i++) { newDataSet.setDouble(i, j, dataSet.getDouble(i, j)); } } else if (_var instanceof DiscreteVariable) { DiscreteVariable variable = (DiscreteVariable) _var; int numCategories = variable.getNumCategories(); for (int i = 0; i < dataSet.getNumRows(); i++) { int value = dataSet.getInt(i, j); if (value == DiscreteVariable.MISSING_VALUE) { newDataSet.setInt(i, j, numCategories); } else { newDataSet.setInt(i, j, value); } } } } return newDataSet; }