/** Train L2 tree boost. */ private void train2(Attribute[] attributes, double[][] x, int[] y) { int n = x.length; int N = (int) Math.round(n * f); int[] y2 = new int[n]; for (int i = 0; i < n; i++) { if (y[i] == 1) { y2[i] = 1; } else { y2[i] = -1; } } y = y2; double[] h = new double[n]; // current F(x_i) double[] response = new double[n]; // response variable for regression tree. double mu = Math.mean(y); b = 0.5 * Math.log((1 + mu) / (1 - mu)); for (int i = 0; i < n; i++) { h[i] = b; } int[][] order = SmileUtils.sort(attributes, x); RegressionTree.NodeOutput output = new L2NodeOutput(response); trees = new RegressionTree[T]; int[] perm = new int[n]; int[] samples = new int[n]; for (int i = 0; i < n; i++) { perm[i] = i; } for (int m = 0; m < T; m++) { Arrays.fill(samples, 0); Math.permutate(perm); for (int i = 0; i < N; i++) { samples[perm[i]] = 1; } for (int i = 0; i < n; i++) { response[i] = 2.0 * y[i] / (1 + Math.exp(2 * y[i] * h[i])); } trees[m] = new RegressionTree(attributes, x, response, J, order, samples, output); for (int i = 0; i < n; i++) { h[i] += shrinkage * trees[m].predict(x[i]); } } }
/** * Standard EM algorithm which iteratively alternates Expectation and Maximization steps until * convergence. * * @param components the initial configuration. * @param x the input data. * @param gamma the regularization parameter. * @param maxIter the maximum number of iterations. * @return log Likelihood */ double EM(List<Component> components, double[][] x, double gamma, int maxIter) { if (x.length < components.size() / 2) throw new IllegalArgumentException("Too many components"); if (gamma < 0.0 || gamma > 0.2) throw new IllegalArgumentException("Invalid regularization factor gamma."); int n = x.length; int m = components.size(); double[][] posteriori = new double[m][n]; // Log Likelihood double L = 0.0; for (double[] xi : x) { double p = 0.0; for (Component c : components) p += c.priori * c.distribution.p(xi); if (p > 0) L += Math.log(p); } // EM loop until convergence int iter = 0; for (; iter < maxIter; iter++) { // Expectation step for (int i = 0; i < m; i++) { Component c = components.get(i); for (int j = 0; j < n; j++) { posteriori[i][j] = c.priori * c.distribution.p(x[j]); } } // Normalize posteriori probability. for (int j = 0; j < n; j++) { double p = 0.0; for (int i = 0; i < m; i++) { p += posteriori[i][j]; } for (int i = 0; i < m; i++) { posteriori[i][j] /= p; } // Adjust posterior probabilites based on Regularized EM algorithm. if (gamma > 0) { for (int i = 0; i < m; i++) { posteriori[i][j] *= (1 + gamma * Math.log2(posteriori[i][j])); if (Double.isNaN(posteriori[i][j]) || posteriori[i][j] < 0.0) { posteriori[i][j] = 0.0; } } } } // Maximization step ArrayList<Component> newConfig = new ArrayList<Component>(); for (int i = 0; i < m; i++) newConfig.add( ((MultivariateExponentialFamily) components.get(i).distribution).M(x, posteriori[i])); double sumAlpha = 0.0; for (int i = 0; i < m; i++) sumAlpha += newConfig.get(i).priori; for (int i = 0; i < m; i++) newConfig.get(i).priori /= sumAlpha; double newL = 0.0; for (double[] xi : x) { double p = 0.0; for (Component c : newConfig) { p += c.priori * c.distribution.p(xi); } if (p > 0) newL += Math.log(p); } if (newL > L) { L = newL; components.clear(); components.addAll(newConfig); } else { break; } } return L; }