/** Train L2 tree boost. */ private void train2(Attribute[] attributes, double[][] x, int[] y) { int n = x.length; int N = (int) Math.round(n * f); int[] y2 = new int[n]; for (int i = 0; i < n; i++) { if (y[i] == 1) { y2[i] = 1; } else { y2[i] = -1; } } y = y2; double[] h = new double[n]; // current F(x_i) double[] response = new double[n]; // response variable for regression tree. double mu = Math.mean(y); b = 0.5 * Math.log((1 + mu) / (1 - mu)); for (int i = 0; i < n; i++) { h[i] = b; } int[][] order = SmileUtils.sort(attributes, x); RegressionTree.NodeOutput output = new L2NodeOutput(response); trees = new RegressionTree[T]; int[] perm = new int[n]; int[] samples = new int[n]; for (int i = 0; i < n; i++) { perm[i] = i; } for (int m = 0; m < T; m++) { Arrays.fill(samples, 0); Math.permutate(perm); for (int i = 0; i < N; i++) { samples[perm[i]] = 1; } for (int i = 0; i < n; i++) { response[i] = 2.0 * y[i] / (1 + Math.exp(2 * y[i] * h[i])); } trees[m] = new RegressionTree(attributes, x, response, J, order, samples, output); for (int i = 0; i < n; i++) { h[i] += shrinkage * trees[m].predict(x[i]); } } }
@Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append(String.format("CLARANS distortion: %.5f%n", distortion)); sb.append(String.format("Clusters of %d data points:%n", y.length)); for (int i = 0; i < k; i++) { int r = (int) Math.round(1000.0 * size[i] / y.length); sb.append(String.format("%3d\t%5d (%2d.%1d%%)%n", i, size[i], r / 10, r % 10)); } return sb.toString(); }
/** Convert coordinate to a string. */ public static String coordToString(double... c) { StringBuilder builder = new StringBuilder("("); for (int i = 0; i < c.length; i++) { builder.append(Math.round(c[i], 2)).append(","); } if (c.length > 0) { builder.setCharAt(builder.length(), ')'); } else { builder.append(")"); } return builder.toString(); }
/** Train L-k tree boost. */ private void traink(Attribute[] attributes, double[][] x, int[] y) { int n = x.length; int N = (int) Math.round(n * f); double[][] h = new double[k][n]; // boost tree output. double[][] p = new double[k][n]; // posteriori probabilities. double[][] response = new double[k][n]; // pseudo response. int[][] order = SmileUtils.sort(attributes, x); forest = new RegressionTree[k][T]; RegressionTree.NodeOutput[] output = new LKNodeOutput[k]; for (int i = 0; i < k; i++) { output[i] = new LKNodeOutput(response[i]); } int[] perm = new int[n]; int[] samples = new int[n]; for (int i = 0; i < n; i++) { perm[i] = i; } for (int m = 0; m < T; m++) { for (int i = 0; i < n; i++) { double max = Double.NEGATIVE_INFINITY; for (int j = 0; j < k; j++) { if (max < h[j][i]) { max = h[j][i]; } } double Z = 0.0; for (int j = 0; j < k; j++) { p[j][i] = Math.exp(h[j][i] - max); Z += p[j][i]; } for (int j = 0; j < k; j++) { p[j][i] /= Z; } } for (int j = 0; j < k; j++) { for (int i = 0; i < n; i++) { if (y[i] == j) { response[j][i] = 1.0; } else { response[j][i] = 0.0; } response[j][i] -= p[j][i]; } Arrays.fill(samples, 0); Math.permutate(perm); for (int i = 0; i < N; i++) { samples[perm[i]] = 1; } forest[j][m] = new RegressionTree(attributes, x, response[j], J, order, samples, output[j]); for (int i = 0; i < n; i++) { h[j][i] += shrinkage * forest[j][m].predict(x[i]); } } } }
/** * Constructor. Clustering data into k clusters. The maximum number of random search is set to * 0.02 * k * (n - k), where n is the number of data and k is the number clusters. The number of * local searches is max(8, numProcessors). * * @param data the dataset for clustering. * @param distance the distance/dissimilarity measure. * @param k the number of clusters. */ public CLARANS(T[] data, Distance<T> distance, int k) { this(data, distance, k, (int) Math.round(0.0125 * k * (data.length - k))); }