@Test public void testBasicClustering() { List<? extends WeightedVector> data = cubishTestData(1); BallKMeans r = new BallKMeans(new BruteSearch(new EuclideanDistanceMeasure()), 6, 20); r.cluster(data); for (Centroid centroid : r) { for (int i = 0; i < 10; i++) { System.out.printf("%10.4f", centroid.get(i)); } System.out.printf("\n"); } }
@Test public void testClustering() { UpdatableSearcher searcher = new BruteSearch(new EuclideanDistanceMeasure()); BallKMeans clusterer = new BallKMeans(searcher, 1 << NUM_DIMENSIONS, NUM_ITERATIONS); long startTime = System.currentTimeMillis(); clusterer.cluster(syntheticData.getFirst()); long endTime = System.currentTimeMillis(); assertEquals( "Total weight not preserved", totalWeight(syntheticData.getFirst()), totalWeight(clusterer), 1e-9); // Verify that each corner of the cube has a centroid very nearby. // This is probably FALSE for large-dimensional spaces! double maxWeight = 0; for (Vector mean : syntheticData.getSecond()) { WeightedThing<Vector> v = searcher.search(mean, 1).get(0); maxWeight = Math.max(v.getWeight(), maxWeight); } assertTrue("Maximum weight too large " + maxWeight, maxWeight < 0.05); double clusterTime = (endTime - startTime) / 1000.0; System.out.printf( "%s\n%.2f for clustering\n%.1f us per row\n\n", searcher.getClass().getName(), clusterTime, clusterTime / syntheticData.getFirst().size() * 1e6); // verify that the total weight of the centroids near each corner is correct double[] cornerWeights = new double[1 << NUM_DIMENSIONS]; Searcher trueFinder = new BruteSearch(new EuclideanDistanceMeasure()); for (Vector trueCluster : syntheticData.getSecond()) { trueFinder.add(trueCluster); } for (Centroid centroid : clusterer) { WeightedThing<Vector> closest = trueFinder.search(centroid, 1).get(0); cornerWeights[((Centroid) closest.getValue()).getIndex()] += centroid.getWeight(); } int expectedNumPoints = NUM_DATA_POINTS / (1 << NUM_DIMENSIONS); for (double v : cornerWeights) { System.out.printf("%f ", v); } System.out.println(); for (double v : cornerWeights) { assertEquals(expectedNumPoints, v, 0); } }
@Test public void testInitialization() { // start with super clusterable data List<? extends WeightedVector> data = cubishTestData(0.01); // just do initialization of ball k-means. This should drop a point into each of the clusters BallKMeans r = new BallKMeans(new BruteSearch(new EuclideanDistanceMeasure()), 6, 20); r.cluster(data); // put the centroids into a matrix Matrix x = new DenseMatrix(6, 5); int row = 0; for (Centroid c : r) { x.viewRow(row).assign(c.viewPart(0, 5)); row++; } // verify that each column looks right. Should contain zeros except for a single 6. final Vector columnNorms = x.aggregateColumns( new VectorFunction() { @Override public double apply(Vector f) { // return the sum of three discrepancy measures return Math.abs(f.minValue()) + Math.abs(f.maxValue() - 6) + Math.abs(f.norm(1) - 6); } }); // verify all errors are nearly zero assertEquals(0, columnNorms.norm(1) / columnNorms.size(), 0.1); // verify that the centroids are a permutation of the original ones SingularValueDecomposition svd = new SingularValueDecomposition(x); Vector s = svd.getS().viewDiagonal().assign(Functions.div(6)); assertEquals(5, s.getLengthSquared(), 0.05); assertEquals(5, s.norm(1), 0.05); }