Beispiel #1
0
  /** Train L2 tree boost. */
  private void train2(Attribute[] attributes, double[][] x, int[] y) {
    int n = x.length;
    int N = (int) Math.round(n * f);

    int[] y2 = new int[n];
    for (int i = 0; i < n; i++) {
      if (y[i] == 1) {
        y2[i] = 1;
      } else {
        y2[i] = -1;
      }
    }

    y = y2;

    double[] h = new double[n]; // current F(x_i)
    double[] response = new double[n]; // response variable for regression tree.

    double mu = Math.mean(y);
    b = 0.5 * Math.log((1 + mu) / (1 - mu));

    for (int i = 0; i < n; i++) {
      h[i] = b;
    }

    int[][] order = SmileUtils.sort(attributes, x);
    RegressionTree.NodeOutput output = new L2NodeOutput(response);
    trees = new RegressionTree[T];

    int[] perm = new int[n];
    int[] samples = new int[n];
    for (int i = 0; i < n; i++) {
      perm[i] = i;
    }

    for (int m = 0; m < T; m++) {
      Arrays.fill(samples, 0);

      Math.permutate(perm);
      for (int i = 0; i < N; i++) {
        samples[perm[i]] = 1;
      }

      for (int i = 0; i < n; i++) {
        response[i] = 2.0 * y[i] / (1 + Math.exp(2 * y[i] * h[i]));
      }

      trees[m] = new RegressionTree(attributes, x, response, J, order, samples, output);

      for (int i = 0; i < n; i++) {
        h[i] += shrinkage * trees[m].predict(x[i]);
      }
    }
  }
  /**
   * Standard EM algorithm which iteratively alternates Expectation and Maximization steps until
   * convergence.
   *
   * @param components the initial configuration.
   * @param x the input data.
   * @param gamma the regularization parameter.
   * @param maxIter the maximum number of iterations.
   * @return log Likelihood
   */
  double EM(List<Component> components, double[][] x, double gamma, int maxIter) {
    if (x.length < components.size() / 2) throw new IllegalArgumentException("Too many components");

    if (gamma < 0.0 || gamma > 0.2)
      throw new IllegalArgumentException("Invalid regularization factor gamma.");

    int n = x.length;
    int m = components.size();

    double[][] posteriori = new double[m][n];

    // Log Likelihood
    double L = 0.0;
    for (double[] xi : x) {
      double p = 0.0;
      for (Component c : components) p += c.priori * c.distribution.p(xi);
      if (p > 0) L += Math.log(p);
    }

    // EM loop until convergence
    int iter = 0;
    for (; iter < maxIter; iter++) {

      // Expectation step
      for (int i = 0; i < m; i++) {
        Component c = components.get(i);

        for (int j = 0; j < n; j++) {
          posteriori[i][j] = c.priori * c.distribution.p(x[j]);
        }
      }

      // Normalize posteriori probability.
      for (int j = 0; j < n; j++) {
        double p = 0.0;

        for (int i = 0; i < m; i++) {
          p += posteriori[i][j];
        }

        for (int i = 0; i < m; i++) {
          posteriori[i][j] /= p;
        }

        // Adjust posterior probabilites based on Regularized EM algorithm.
        if (gamma > 0) {
          for (int i = 0; i < m; i++) {
            posteriori[i][j] *= (1 + gamma * Math.log2(posteriori[i][j]));
            if (Double.isNaN(posteriori[i][j]) || posteriori[i][j] < 0.0) {
              posteriori[i][j] = 0.0;
            }
          }
        }
      }

      // Maximization step
      ArrayList<Component> newConfig = new ArrayList<Component>();
      for (int i = 0; i < m; i++)
        newConfig.add(
            ((MultivariateExponentialFamily) components.get(i).distribution).M(x, posteriori[i]));

      double sumAlpha = 0.0;
      for (int i = 0; i < m; i++) sumAlpha += newConfig.get(i).priori;

      for (int i = 0; i < m; i++) newConfig.get(i).priori /= sumAlpha;

      double newL = 0.0;
      for (double[] xi : x) {
        double p = 0.0;
        for (Component c : newConfig) {
          p += c.priori * c.distribution.p(xi);
        }
        if (p > 0) newL += Math.log(p);
      }

      if (newL > L) {
        L = newL;
        components.clear();
        components.addAll(newConfig);
      } else {
        break;
      }
    }

    return L;
  }