public int getCategoryIndex(String nodeName, String category) { int index = getVariableSource().getVariableNames().indexOf(nodeName); DiscreteVariable variable = (DiscreteVariable) getVariableSource().getVariables().get(index); return variable.getCategories().indexOf(category); }
public final DataSet filter(DataSet dataSet) { // Why does it have to be discrete? Why can't we simply expand // whatever discrete columns are there and leave the continuous // ones untouched? jdramsey 7/4/2005 // if (!(dataSet.isDiscrete())) { // throw new IllegalArgumentException("Data set must be discrete."); // } List<Node> variables = new LinkedList<>(); // Add all of the variables to the new data set. for (int j = 0; j < dataSet.getNumColumns(); j++) { Node _var = dataSet.getVariable(j); if (!(_var instanceof DiscreteVariable)) { variables.add(_var); continue; } DiscreteVariable variable = (DiscreteVariable) _var; String oldName = variable.getName(); List<String> oldCategories = variable.getCategories(); List<String> newCategories = new LinkedList<>(oldCategories); String newCategory = "Missing"; int _j = 0; while (oldCategories.contains(newCategory)) { newCategory = "Missing" + (++_j); } newCategories.add(newCategory); String newName = oldName + "+"; DiscreteVariable newVariable = new DiscreteVariable(newName, newCategories); variables.add(newVariable); } DataSet newDataSet = new ColtDataSet(dataSet.getNumRows(), variables); // Copy old values to new data set, replacing missing values with new // "MissingValue" categories. for (int j = 0; j < dataSet.getNumColumns(); j++) { Node _var = dataSet.getVariable(j); if (_var instanceof ContinuousVariable) { for (int i = 0; i < dataSet.getNumRows(); i++) { newDataSet.setDouble(i, j, dataSet.getDouble(i, j)); } } else if (_var instanceof DiscreteVariable) { DiscreteVariable variable = (DiscreteVariable) _var; int numCategories = variable.getNumCategories(); for (int i = 0; i < dataSet.getNumRows(); i++) { int value = dataSet.getInt(i, j); if (value == DiscreteVariable.MISSING_VALUE) { newDataSet.setInt(i, j, numCategories); } else { newDataSet.setInt(i, j, value); } } } } return newDataSet; }