/** * Classifies an instance w.r.t. the partitions found. It applies a naive min-distance algorithm. * * @param instance the instance to classify * @return the cluster that contains the nearest point to the instance */ public int clusterInstance(Instance instance) throws java.lang.Exception { DoubleMatrix1D u = DoubleFactory1D.dense.make(instance.toDoubleArray()); double min_dist = Double.POSITIVE_INFINITY; int c = -1; for (int i = 0; i < v.rows(); i++) { double dist = distnorm2(u, v.viewRow(i)); if (dist < min_dist) { c = cluster[i]; min_dist = dist; } } return c; }
/** * Splits recursively the points of the graph while the value of the best cut found is less of a * specified limit (the alpha star factor). * * @param W the weight matrix of the graph * @param alpha_star the alpha star factor * @return an array of sets of points (partitions) */ protected int[][] partition(DoubleMatrix2D W, double alpha_star) { numPartitions++; // System.out.println("!"); // If the graph contains only one point if (W.columns() == 1) { int[][] p = new int[1][1]; p[0][0] = 0; return p; // Otherwise } else { // Computes the best cut int[][] cut = bestCut(W); // Computes the value of the found cut double cutVal = Ncut(W, cut[0], cut[1], null); // System.out.println("cutVal = "+cutVal +"\tnumPartitions = "+numPartitions); // If the value is less than alpha star if (cutVal < alpha_star && numPartitions < 2) { // Recursively partitions the first one found ... DoubleMatrix2D W0 = W.viewSelection(cut[0], cut[0]); int[][] p0 = partition(W0, alpha_star); // ... and the second one DoubleMatrix2D W1 = W.viewSelection(cut[1], cut[1]); int[][] p1 = partition(W1, alpha_star); // Merges the partitions found in the previous recursive steps int[][] p = new int[p0.length + p1.length][]; for (int i = 0; i < p0.length; i++) { p[i] = new int[p0[i].length]; for (int j = 0; j < p0[i].length; j++) p[i][j] = cut[0][p0[i][j]]; } for (int i = 0; i < p1.length; i++) { p[i + p0.length] = new int[p1[i].length]; for (int j = 0; j < p1[i].length; j++) p[i + p0.length][j] = cut[1][p1[i][j]]; } return p; } else { // Otherwise returns the partitions found in current step // w/o recursive invocation int[][] p = new int[1][W.columns()]; for (int i = 0; i < p[0].length; i++) p[0][i] = i; return p; } } }
/** * Generates a clusterer by the mean of spectral clustering algorithm. * * @param data set of instances serving as training data * @exception Exception if the clusterer has not been generated successfully */ public void buildClusterer(Instances data) throws java.lang.Exception { m_Sequences = new Instances(data); int n = data.numInstances(); int k = data.numAttributes(); DoubleMatrix2D w; if (useSparseMatrix) w = DoubleFactory2D.sparse.make(n, n); else w = DoubleFactory2D.dense.make(n, n); double[][] v1 = new double[n][]; for (int i = 0; i < n; i++) v1[i] = data.instance(i).toDoubleArray(); v = DoubleFactory2D.dense.make(v1); double sigma_sq = sigma * sigma; // Sets up similarity matrix for (int i = 0; i < n; i++) for (int j = i; j < n; j++) { /*double dist = distnorm2(v.viewRow(i), v.viewRow(j)); if((r == -1) || (dist < r)) { double sim = Math.exp(- (dist * dist) / (2 * sigma_sq)); w.set(i, j, sim); w.set(j, i, sim); }*/ /* String [] key = {data.instance(i).stringValue(0), data.instance(j).stringValue(0)}; System.out.println(key[0]); System.out.println(key[1]); System.out.println(simScoreMap.containsKey(key)); Double simValue = simScoreMap.get(key);*/ double sim = sim_matrix[i][j]; w.set(i, j, sim); w.set(j, i, sim); } // Partitions points int[][] p = partition(w, alpha_star); // Deploys results numOfClusters = p.length; cluster = new int[n]; for (int i = 0; i < p.length; i++) for (int j = 0; j < p[i].length; j++) cluster[p[i][j]] = i; // System.out.println("Final partition:"); // UtilsJS.printMatrix(p); // System.out.println("Cluster:\n"); // UtilsJS.printArray(cluster); this.numOfClusters = cluster[Utils.maxIndex(cluster)] + 1; // System.out.println("Num clusters:\t"+this.numOfClusters); }
/** * Returns the best cut of a graph w.r.t. the degree of dissimilarity between points of different * partitions and the degree of similarity between points of the same partition. * * @param W the weight matrix of the graph * @return an array of two elements, each of these contains the points of a partition */ protected static int[][] bestCut(DoubleMatrix2D W) { int n = W.columns(); // Builds the diagonal matrices D and D^(-1/2) (represented as their diagonals) DoubleMatrix1D d = DoubleFactory1D.dense.make(n); DoubleMatrix1D d_minus_1_2 = DoubleFactory1D.dense.make(n); for (int i = 0; i < n; i++) { double d_i = W.viewRow(i).zSum(); d.set(i, d_i); d_minus_1_2.set(i, 1 / Math.sqrt(d_i)); } DoubleMatrix2D D = DoubleFactory2D.sparse.diagonal(d); // System.out.println("DoubleMatrix2D :\n"+D.toString()); DoubleMatrix2D X = D.copy(); // System.out.println("DoubleMatrix2D copy :\n"+X.toString()); // X = D^(-1/2) * (D - W) * D^(-1/2) X.assign(W, Functions.minus); // System.out.println("DoubleMatrix2D X: (D-W) :\n"+X.toString()); for (int i = 0; i < n; i++) for (int j = 0; j < n; j++) X.set(i, j, X.get(i, j) * d_minus_1_2.get(i) * d_minus_1_2.get(j)); // Computes the eigenvalues and the eigenvectors of X EigenvalueDecomposition e = new EigenvalueDecomposition(X); DoubleMatrix1D lambda = e.getRealEigenvalues(); // Selects the eigenvector z_2 associated with the second smallest eigenvalue // Creates a map that contains the pairs <index, eigenvalue> AbstractIntDoubleMap map = new OpenIntDoubleHashMap(n); for (int i = 0; i < n; i++) map.put(i, Math.abs(lambda.get(i))); IntArrayList list = new IntArrayList(); // Sorts the map on the value map.keysSortedByValue(list); // Gets the index of the second smallest element int i_2 = list.get(1); // y_2 = D^(-1/2) * z_2 DoubleMatrix1D y_2 = e.getV().viewColumn(i_2).copy(); y_2.assign(d_minus_1_2, Functions.mult); // Creates a map that contains the pairs <i, y_2[i]> map.clear(); for (int i = 0; i < n; i++) map.put(i, y_2.get(i)); // Sorts the map on the value map.keysSortedByValue(list); // Search the element in the map previuosly ordered that minimizes the cut // of the partition double best_cut = Double.POSITIVE_INFINITY; int[][] partition = new int[2][]; // The array v contains all the elements of the graph ordered by their // projection on vector y_2 int[] v = list.elements(); // For each admissible splitting point i for (int i = 1; i < n; i++) { // The array a contains all the elements that have a projection on vector // y_2 less or equal to the one of i-th element // The array b contains the remaining elements int[] a = new int[i]; int[] b = new int[n - i]; System.arraycopy(v, 0, a, 0, i); System.arraycopy(v, i, b, 0, n - i); double cut = Ncut(W, a, b, v); if (cut < best_cut) { best_cut = cut; partition[0] = a; partition[1] = b; } } // System.out.println("Partition:"); // UtilsJS.printMatrix(partition); return partition; }
/** * Computes the association degree between two partitions of a graph.<br> * The association degree is defined as the sum of the weights of all the edges between points of * the two partitions. * * @param W the weight matrix of the graph * @param a the points of the first partition * @param b the points of the second partition * @return the association degree */ protected static double asso(DoubleMatrix2D W, int[] a, int[] b) { return W.viewSelection(a, b).zSum(); }