Exemplo n.º 1
0
  /**
   * Fits the mixture (or mixing) distribution to the data. The data is not pre-clustered for
   * computational efficiency.
   *
   * @param data the data supposedly generated from the mixture
   * @param method the method to be used. Refer to the static final variables of this class.
   * @return the generated distribution
   */
  public DiscreteFunction fitForSingleCluster(DoubleVector data, int method) {

    if (data.size() < 2) return new DiscreteFunction(data);
    DoubleVector sp = supportPoints(data, 0);
    PaceMatrix fi = fittingIntervals(data);
    PaceMatrix pm = probabilityMatrix(sp, fi);
    PaceMatrix epm = new PaceMatrix(empiricalProbability(data, fi).timesEquals(1. / data.size()));

    IntVector pvt = (IntVector) IntVector.seq(0, sp.size() - 1);
    DoubleVector weights;

    switch (method) {
      case NNMMethod:
        weights = pm.nnls(epm, pvt);
        break;
      case PMMethod:
        weights = pm.nnlse1(epm, pvt);
        break;
      default:
        throw new IllegalArgumentException("unknown method");
    }

    DoubleVector sp2 = new DoubleVector(pvt.size());
    for (int i = 0; i < sp2.size(); i++) {
      sp2.set(i, sp.get(pvt.get(i)));
    }

    DiscreteFunction d = new DiscreteFunction(sp2, weights);
    d.sort();
    d.normalize();
    return d;
  }
Exemplo n.º 2
0
  /**
   * Fits the mixture (or mixing) distribution to the data.
   *
   * @param data the data supposedly generated from the mixture
   * @param method the method to be used. Refer to the static final variables of this class.
   */
  public void fit(DoubleVector data, int method) {
    DoubleVector data2 = (DoubleVector) data.clone();
    if (data2.unsorted()) data2.sort();

    int n = data2.size();
    int start = 0;
    DoubleVector subset;
    DiscreteFunction d = new DiscreteFunction();
    for (int i = 0; i < n - 1; i++) {
      if (separable(data2, start, i, data2.get(i + 1))
          && separable(data2, i + 1, n - 1, data2.get(i))) {
        subset = (DoubleVector) data2.subvector(start, i);
        d.plusEquals(fitForSingleCluster(subset, method).timesEquals(i - start + 1));
        start = i + 1;
      }
    }
    subset = (DoubleVector) data2.subvector(start, n - 1);
    d.plusEquals(fitForSingleCluster(subset, method).timesEquals(n - start));
    d.sort();
    d.normalize();
    mixingDistribution = d;
  }