/** * Builds up a consistently random (same seed every time) sparse matrix, with sometimes repeated * rows. * * @param numRows * @param nonNullRows * @param numCols * @param entriesPerRow * @param entryMean * @return */ public static Matrix randomSequentialAccessSparseMatrix( int numRows, int nonNullRows, int numCols, int entriesPerRow, double entryMean) { SparseRowMatrix m = new SparseRowMatrix(new int[] {numRows, numCols}); double n = 0; Random r = new Random(1234L); for (int i = 0; i < nonNullRows; i++) { SequentialAccessSparseVector v = new SequentialAccessSparseVector(numCols); for (int j = 0; j < entriesPerRow; j++) { int col = r.nextInt(numCols); double val = r.nextGaussian(); v.set(col, val * entryMean); } int c = r.nextInt(numRows); if (r.nextBoolean() || numRows == nonNullRows) { m.assignRow(numRows == nonNullRows ? i : c, v); } else { Vector other = m.getRow(r.nextInt(numRows)); if (other != null && other.getLengthSquared() > 0) { m.assignRow(c, other.clone()); } } n += m.getRow(c).getLengthSquared(); } return m; }
public static void assertEigen( Matrix eigens, VectorIterable corpus, int numEigensToCheck, double errorMargin, boolean isSymmetric) { for (int i = 0; i < numEigensToCheck; i++) { Vector e = eigens.getRow(i); if (e.getLengthSquared() == 0) { continue; } Vector afterMultiply = isSymmetric ? corpus.times(e) : corpus.timesSquared(e); double dot = afterMultiply.dot(e); double afterNorm = afterMultiply.getLengthSquared(); double error = 1 - dot / Math.sqrt(afterNorm * e.getLengthSquared()); assertTrue( "Error margin: {" + error + " too high! (for eigen " + i + ')', Math.abs(error) < errorMargin); } }
@Test public void testInitialization() { // start with super clusterable data List<? extends WeightedVector> data = cubishTestData(0.01); // just do initialization of ball k-means. This should drop a point into each of the clusters BallKMeans r = new BallKMeans(new BruteSearch(new EuclideanDistanceMeasure()), 6, 20); r.cluster(data); // put the centroids into a matrix Matrix x = new DenseMatrix(6, 5); int row = 0; for (Centroid c : r) { x.viewRow(row).assign(c.viewPart(0, 5)); row++; } // verify that each column looks right. Should contain zeros except for a single 6. final Vector columnNorms = x.aggregateColumns( new VectorFunction() { @Override public double apply(Vector f) { // return the sum of three discrepancy measures return Math.abs(f.minValue()) + Math.abs(f.maxValue() - 6) + Math.abs(f.norm(1) - 6); } }); // verify all errors are nearly zero assertEquals(0, columnNorms.norm(1) / columnNorms.size(), 0.1); // verify that the centroids are a permutation of the original ones SingularValueDecomposition svd = new SingularValueDecomposition(x); Vector s = svd.getS().viewDiagonal().assign(Functions.div(6)); assertEquals(5, s.getLengthSquared(), 0.05); assertEquals(5, s.norm(1), 0.05); }
/** * Return if the cluster is converged by comparing its center and centroid. * * @param measure The distance measure to use for cluster-point comparisons. * @param convergenceDelta the convergence delta to use for stopping. * @return if the cluster is converged */ public boolean computeConvergence(DistanceMeasure measure, double convergenceDelta) { Vector centroid = computeCentroid(); converged = measure.distance(centroid.getLengthSquared(), centroid, getCenter()) <= convergenceDelta; return converged; }