private void _cluster(double[][] data, int k) { long clock = System.currentTimeMillis(); SpectralClustering cluster = new SpectralClustering(data, k, 0.355); System.out.format( "DBSCAN clusterings %d samples in %dms\n", data.length, System.currentTimeMillis() - clock); System.out.println("getNumClusters:" + cluster.getNumClusters()); System.out.println("getClusterSize:" + cluster.getClusterSize()); // System.out.println(JSON.toJSONString(dbscan.getClusterSize())); System.out.println("toString:" + cluster.toString()); /** ************************************************************ */ boolean more = true; EigenValueDecomposition eigen = cluster.getEigen(); double[] lab = eigen.getEigenValues(); double sd = smile.math.Math.sd(eigen.getEigenValues()); System.out.println("sd(eigen.getEigenValues()):" + sd); if (Math.min(eigen.getEigenValues()) > 0.3) { result = cluster; cluster(data, k + 1); } else { return; } }
/** * Constructor. Learn Fisher's linear discriminant. * * @param x training instances. * @param y training labels in [0, k), where k is the number of classes. * @param L the dimensionality of mapped space. * @param tol a tolerance to decide if a covariance matrix is singular; it will reject variables * whose variance is less than tol<sup>2</sup>. */ public FLD(double[][] x, int[] y, int L, double tol) { if (x.length != y.length) { throw new IllegalArgumentException( String.format("The sizes of X and Y don't match: %d != %d", x.length, y.length)); } // class label set. int[] labels = Math.unique(y); Arrays.sort(labels); for (int i = 0; i < labels.length; i++) { if (labels[i] < 0) { throw new IllegalArgumentException("Negative class label: " + labels[i]); } if (i > 0 && labels[i] - labels[i - 1] > 1) { throw new IllegalArgumentException("Missing class: " + labels[i] + 1); } } k = labels.length; if (k < 2) { throw new IllegalArgumentException("Only one class."); } if (tol < 0.0) { throw new IllegalArgumentException("Invalid tol: " + tol); } if (x.length <= k) { throw new IllegalArgumentException( String.format("Sample size is too small: %d <= %d", x.length, k)); } if (L >= k) { throw new IllegalArgumentException( String.format("The dimensionality of mapped space is too high: %d >= %d", L, k)); } if (L <= 0) { L = k - 1; } final int n = x.length; p = x[0].length; // The number of instances in each class. int[] ni = new int[k]; // Common mean vector. mean = Math.colMean(x); // Common covariance. double[][] T = new double[p][p]; // Class mean vectors. mu = new double[k][p]; for (int i = 0; i < n; i++) { int c = y[i]; ni[c]++; for (int j = 0; j < p; j++) { mu[c][j] += x[i][j]; } } for (int i = 0; i < k; i++) { for (int j = 0; j < p; j++) { mu[i][j] = mu[i][j] / ni[i] - mean[j]; } } for (int i = 0; i < n; i++) { for (int j = 0; j < p; j++) { for (int l = 0; l <= j; l++) { T[j][l] += (x[i][j] - mean[j]) * (x[i][l] - mean[l]); } } } for (int j = 0; j < p; j++) { for (int l = 0; l <= j; l++) { T[j][l] /= n; T[l][j] = T[j][l]; } } // Between class scatter double[][] B = new double[p][p]; for (int i = 0; i < k; i++) { for (int j = 0; j < p; j++) { for (int l = 0; l <= j; l++) { B[j][l] += mu[i][j] * mu[i][l]; } } } for (int j = 0; j < p; j++) { for (int l = 0; l <= j; l++) { B[j][l] /= k; B[l][j] = B[j][l]; } } EigenValueDecomposition eigen = EigenValueDecomposition.decompose(T, true); tol = tol * tol; double[] s = eigen.getEigenValues(); for (int i = 0; i < s.length; i++) { if (s[i] < tol) { throw new IllegalArgumentException("The covariance matrix is close to singular."); } s[i] = 1.0 / s[i]; } double[][] U = eigen.getEigenVectors(); double[][] UB = Math.atbmm(U, B); for (int i = 0; i < k; i++) { for (int j = 0; j < p; j++) { UB[i][j] *= s[j]; } } Math.abmm(U, UB, B); eigen = EigenValueDecomposition.decompose(B, true); U = eigen.getEigenVectors(); scaling = new double[p][L]; for (int i = 0; i < p; i++) { System.arraycopy(U[i], 0, scaling[i], 0, L); } smean = new double[L]; Math.atx(scaling, mean, smean); smu = Math.abmm(mu, scaling); }