/** * Fits the mixture (or mixing) distribution to the data. The data is not pre-clustered for * computational efficiency. * * @param data the data supposedly generated from the mixture * @param method the method to be used. Refer to the static final variables of this class. * @return the generated distribution */ public DiscreteFunction fitForSingleCluster(DoubleVector data, int method) { if (data.size() < 2) return new DiscreteFunction(data); DoubleVector sp = supportPoints(data, 0); PaceMatrix fi = fittingIntervals(data); PaceMatrix pm = probabilityMatrix(sp, fi); PaceMatrix epm = new PaceMatrix(empiricalProbability(data, fi).timesEquals(1. / data.size())); IntVector pvt = (IntVector) IntVector.seq(0, sp.size() - 1); DoubleVector weights; switch (method) { case NNMMethod: weights = pm.nnls(epm, pvt); break; case PMMethod: weights = pm.nnlse1(epm, pvt); break; default: throw new IllegalArgumentException("unknown method"); } DoubleVector sp2 = new DoubleVector(pvt.size()); for (int i = 0; i < sp2.size(); i++) { sp2.set(i, sp.get(pvt.get(i))); } DiscreteFunction d = new DiscreteFunction(sp2, weights); d.sort(); d.normalize(); return d; }
/* * Recover the maximizing variables going back through the * maximizing bucket_tree; the variables are returned as an array * of markers (non-explanation variables get INVALID_INDEX). */ private int[] backward_maximization() { int i, j; int bi = bucket_tree.length - 1; DiscreteFunction back_df; Bucket b = bucket_tree[bi]; // If there are no explanation variables in the BayesNet, return null if (b.backward_pointers == null) return (null); // Initialize the markers for backward pointers with INVALID_INDEX int backward_markers[] = new int[bn.number_variables()]; for (i = 0; i < backward_markers.length; i++) backward_markers[i] = BayesNet.INVALID_INDEX; // Initialize the marker for the last bucket backward_markers[b.variable.get_index()] = (int) (b.backward_pointers.get_value(0) + 0.5); // Go backwards through the bucket_tree for (i = (bi - 1); i >= 0; i--) { if (!bucket_tree[i].is_explanation()) break; back_df = bucket_tree[i].backward_pointers; // Skip null pointers (caused by evidence) if (back_df == null) continue; // Special treatment for bucket with only one value, // since it can be a bucket with only the bucket variable left if (back_df.number_values() == 1) { backward_markers[bucket_tree[i].variable.get_index()] = (int) (back_df.get_value(0) + 0.5); continue; } // Process the bucket j = back_df.get_position_from_indexes(bn.get_probability_variables(), backward_markers); backward_markers[bucket_tree[i].variable.get_index()] = (int) (back_df.get_value(j) + 0.5); } return (backward_markers); }
/* * Put a DiscreteFunction into the BucketTree beyond the current * active_bucket. If was_first_variable_cancelled_by_evidence is true, * then mark the bucket accordingly. */ private void insert(DiscreteFunction df, boolean was_first_variable_cancelled_by_evidence) { int i, index; Bucket b; for (i = active_bucket; i < bucket_tree.length; i++) { index = bucket_tree[i].variable.get_index(); if (df.memberOf(index)) { bucket_tree[i].discrete_functions.addElement(df); // If the function is a ProbabilityFunction, store its // first variable appropriately (assuming for now that // the first variable is the only possible non-conditioning variable). if ((df instanceof ProbabilityFunction) && (!was_first_variable_cancelled_by_evidence)) { bucket_tree[i].non_conditioning_variables.addElement(df.get_variable(0)); } return; } } }
/** * Fits the mixture (or mixing) distribution to the data. * * @param data the data supposedly generated from the mixture * @param method the method to be used. Refer to the static final variables of this class. */ public void fit(DoubleVector data, int method) { DoubleVector data2 = (DoubleVector) data.clone(); if (data2.unsorted()) data2.sort(); int n = data2.size(); int start = 0; DoubleVector subset; DiscreteFunction d = new DiscreteFunction(); for (int i = 0; i < n - 1; i++) { if (separable(data2, start, i, data2.get(i + 1)) && separable(data2, i + 1, n - 1, data2.get(i))) { subset = (DoubleVector) data2.subvector(start, i); d.plusEquals(fitForSingleCluster(subset, method).timesEquals(i - start + 1)); start = i + 1; } } subset = (DoubleVector) data2.subvector(start, n - 1); d.plusEquals(fitForSingleCluster(subset, method).timesEquals(n - start)); d.sort(); d.normalize(); mixingDistribution = d; }
/** * Converts to a string * * @return a string representation */ public String toString() { return "The mixing distribution:\n" + mixingDistribution.toString(); }
/** Print method for BucketTree. */ public void print(PrintStream out) { out.println("BucketTree:" + "\n\tActive Bucket is " + active_bucket + "."); for (int i = 0; i < bucket_tree.length; i++) bucket_tree[i].print(out); out.println("Bucket result: "); unnormalized_result.print(out); }