/** * Constructor for BucketTree. Does the whole initialization; it should be the only method that * deals with symbolic names for variables. */ public BucketTree(Ordering ord, boolean dpc) { int i, j, markers[]; ProbabilityFunction pf; ProbabilityVariable pv; DiscreteVariable aux_pv; DiscreteFunction ut; String order[]; do_produce_clusters = dpc; ordering = ord; // Collect information from the Ordering object. bn = ord.bn; explanation_status = ord.explanation_status; order = ord.order; // Indicate the first bucket to process active_bucket = 0; // Check the possibility that the query has an observed variable i = bn.index_of_variable(order[order.length - 1]); pv = bn.get_probability_variable(i); if (pv.is_observed() == true) { pf = transform_to_probability_function(bn, pv); bucket_tree = new Bucket[1]; bucket_tree[0] = new Bucket(this, pv, do_produce_clusters); insert(pf); } else { // Initialize the bucket objects bucket_tree = new Bucket[order.length]; for (i = 0; i < order.length; i++) { j = bn.index_of_variable(order[i]); bucket_tree[i] = new Bucket(this, bn.get_probability_variable(j), do_produce_clusters); } // Insert the probability functions into the bucket_tree; // first mark all functions that are actually going // into the bucket_tree. markers = new int[bn.number_variables()]; for (i = 0; i < order.length; i++) markers[bn.index_of_variable(order[i])] = 1; // Now insert functions that are marked and non-null. for (i = 0; i < bn.number_probability_functions(); i++) { if (markers[bn.get_probability_function(i).get_index(0)] == 1) { pf = check_evidence(bn.get_probability_function(i)); if (pf != null) { aux_pv = (bn.get_probability_function(i)).get_variable(0); insert(pf, !pf.memberOf(aux_pv.get_index())); } } } // Insert the utility_function. ut = bn.get_utility_function(); if (ut != null) insert(ut); } }
public final DataSet filter(DataSet dataSet) { // Why does it have to be discrete? Why can't we simply expand // whatever discrete columns are there and leave the continuous // ones untouched? jdramsey 7/4/2005 // if (!(dataSet.isDiscrete())) { // throw new IllegalArgumentException("Data set must be discrete."); // } List<Node> variables = new LinkedList<>(); // Add all of the variables to the new data set. for (int j = 0; j < dataSet.getNumColumns(); j++) { Node _var = dataSet.getVariable(j); if (!(_var instanceof DiscreteVariable)) { variables.add(_var); continue; } DiscreteVariable variable = (DiscreteVariable) _var; String oldName = variable.getName(); List<String> oldCategories = variable.getCategories(); List<String> newCategories = new LinkedList<>(oldCategories); String newCategory = "Missing"; int _j = 0; while (oldCategories.contains(newCategory)) { newCategory = "Missing" + (++_j); } newCategories.add(newCategory); String newName = oldName + "+"; DiscreteVariable newVariable = new DiscreteVariable(newName, newCategories); variables.add(newVariable); } DataSet newDataSet = new ColtDataSet(dataSet.getNumRows(), variables); // Copy old values to new data set, replacing missing values with new // "MissingValue" categories. for (int j = 0; j < dataSet.getNumColumns(); j++) { Node _var = dataSet.getVariable(j); if (_var instanceof ContinuousVariable) { for (int i = 0; i < dataSet.getNumRows(); i++) { newDataSet.setDouble(i, j, dataSet.getDouble(i, j)); } } else if (_var instanceof DiscreteVariable) { DiscreteVariable variable = (DiscreteVariable) _var; int numCategories = variable.getNumCategories(); for (int i = 0; i < dataSet.getNumRows(); i++) { int value = dataSet.getInt(i, j); if (value == DiscreteVariable.MISSING_VALUE) { newDataSet.setInt(i, j, numCategories); } else { newDataSet.setInt(i, j, value); } } } } return newDataSet; }