Exemplo n.º 1
0
  /**
   * Fits the mixture (or mixing) distribution to the data. The data is not pre-clustered for
   * computational efficiency.
   *
   * @param data the data supposedly generated from the mixture
   * @param method the method to be used. Refer to the static final variables of this class.
   * @return the generated distribution
   */
  public DiscreteFunction fitForSingleCluster(DoubleVector data, int method) {

    if (data.size() < 2) return new DiscreteFunction(data);
    DoubleVector sp = supportPoints(data, 0);
    PaceMatrix fi = fittingIntervals(data);
    PaceMatrix pm = probabilityMatrix(sp, fi);
    PaceMatrix epm = new PaceMatrix(empiricalProbability(data, fi).timesEquals(1. / data.size()));

    IntVector pvt = (IntVector) IntVector.seq(0, sp.size() - 1);
    DoubleVector weights;

    switch (method) {
      case NNMMethod:
        weights = pm.nnls(epm, pvt);
        break;
      case PMMethod:
        weights = pm.nnlse1(epm, pvt);
        break;
      default:
        throw new IllegalArgumentException("unknown method");
    }

    DoubleVector sp2 = new DoubleVector(pvt.size());
    for (int i = 0; i < sp2.size(); i++) {
      sp2.set(i, sp.get(pvt.get(i)));
    }

    DiscreteFunction d = new DiscreteFunction(sp2, weights);
    d.sort();
    d.normalize();
    return d;
  }
Exemplo n.º 2
0
  /*
   * Recover the maximizing variables going back through the
   * maximizing bucket_tree; the variables are returned as an array
   * of markers (non-explanation variables get INVALID_INDEX).
   */
  private int[] backward_maximization() {
    int i, j;
    int bi = bucket_tree.length - 1;
    DiscreteFunction back_df;
    Bucket b = bucket_tree[bi];

    // If there are no explanation variables in the BayesNet, return null
    if (b.backward_pointers == null) return (null);

    // Initialize the markers for backward pointers with INVALID_INDEX
    int backward_markers[] = new int[bn.number_variables()];
    for (i = 0; i < backward_markers.length; i++) backward_markers[i] = BayesNet.INVALID_INDEX;

    // Initialize the marker for the last bucket
    backward_markers[b.variable.get_index()] = (int) (b.backward_pointers.get_value(0) + 0.5);

    // Go backwards through the bucket_tree
    for (i = (bi - 1); i >= 0; i--) {
      if (!bucket_tree[i].is_explanation()) break;
      back_df = bucket_tree[i].backward_pointers;
      // Skip null pointers (caused by evidence)
      if (back_df == null) continue;
      // Special treatment for bucket with only one value,
      // since it can be a bucket with only the bucket variable left
      if (back_df.number_values() == 1) {
        backward_markers[bucket_tree[i].variable.get_index()] = (int) (back_df.get_value(0) + 0.5);
        continue;
      }
      // Process the bucket
      j = back_df.get_position_from_indexes(bn.get_probability_variables(), backward_markers);
      backward_markers[bucket_tree[i].variable.get_index()] = (int) (back_df.get_value(j) + 0.5);
    }

    return (backward_markers);
  }
Exemplo n.º 3
0
 /*
  * Put a DiscreteFunction into the BucketTree beyond the current
  * active_bucket. If was_first_variable_cancelled_by_evidence is true,
  * then mark the bucket accordingly.
  */
 private void insert(DiscreteFunction df, boolean was_first_variable_cancelled_by_evidence) {
   int i, index;
   Bucket b;
   for (i = active_bucket; i < bucket_tree.length; i++) {
     index = bucket_tree[i].variable.get_index();
     if (df.memberOf(index)) {
       bucket_tree[i].discrete_functions.addElement(df);
       // If the function is a ProbabilityFunction, store its
       // first variable appropriately (assuming for now that
       // the first variable is the only possible non-conditioning variable).
       if ((df instanceof ProbabilityFunction) && (!was_first_variable_cancelled_by_evidence)) {
         bucket_tree[i].non_conditioning_variables.addElement(df.get_variable(0));
       }
       return;
     }
   }
 }
Exemplo n.º 4
0
  /**
   * Fits the mixture (or mixing) distribution to the data.
   *
   * @param data the data supposedly generated from the mixture
   * @param method the method to be used. Refer to the static final variables of this class.
   */
  public void fit(DoubleVector data, int method) {
    DoubleVector data2 = (DoubleVector) data.clone();
    if (data2.unsorted()) data2.sort();

    int n = data2.size();
    int start = 0;
    DoubleVector subset;
    DiscreteFunction d = new DiscreteFunction();
    for (int i = 0; i < n - 1; i++) {
      if (separable(data2, start, i, data2.get(i + 1))
          && separable(data2, i + 1, n - 1, data2.get(i))) {
        subset = (DoubleVector) data2.subvector(start, i);
        d.plusEquals(fitForSingleCluster(subset, method).timesEquals(i - start + 1));
        start = i + 1;
      }
    }
    subset = (DoubleVector) data2.subvector(start, n - 1);
    d.plusEquals(fitForSingleCluster(subset, method).timesEquals(n - start));
    d.sort();
    d.normalize();
    mixingDistribution = d;
  }
Exemplo n.º 5
0
 /**
  * Converts to a string
  *
  * @return a string representation
  */
 public String toString() {
   return "The mixing distribution:\n" + mixingDistribution.toString();
 }
Exemplo n.º 6
0
 /** Print method for BucketTree. */
 public void print(PrintStream out) {
   out.println("BucketTree:" + "\n\tActive Bucket is " + active_bucket + ".");
   for (int i = 0; i < bucket_tree.length; i++) bucket_tree[i].print(out);
   out.println("Bucket result: ");
   unnormalized_result.print(out);
 }