@Test public void testClustering() { UpdatableSearcher searcher = new BruteSearch(new EuclideanDistanceMeasure()); BallKMeans clusterer = new BallKMeans(searcher, 1 << NUM_DIMENSIONS, NUM_ITERATIONS); long startTime = System.currentTimeMillis(); clusterer.cluster(syntheticData.getFirst()); long endTime = System.currentTimeMillis(); assertEquals( "Total weight not preserved", totalWeight(syntheticData.getFirst()), totalWeight(clusterer), 1e-9); // Verify that each corner of the cube has a centroid very nearby. // This is probably FALSE for large-dimensional spaces! double maxWeight = 0; for (Vector mean : syntheticData.getSecond()) { WeightedThing<Vector> v = searcher.search(mean, 1).get(0); maxWeight = Math.max(v.getWeight(), maxWeight); } assertTrue("Maximum weight too large " + maxWeight, maxWeight < 0.05); double clusterTime = (endTime - startTime) / 1000.0; System.out.printf( "%s\n%.2f for clustering\n%.1f us per row\n\n", searcher.getClass().getName(), clusterTime, clusterTime / syntheticData.getFirst().size() * 1e6); // verify that the total weight of the centroids near each corner is correct double[] cornerWeights = new double[1 << NUM_DIMENSIONS]; Searcher trueFinder = new BruteSearch(new EuclideanDistanceMeasure()); for (Vector trueCluster : syntheticData.getSecond()) { trueFinder.add(trueCluster); } for (Centroid centroid : clusterer) { WeightedThing<Vector> closest = trueFinder.search(centroid, 1).get(0); cornerWeights[((Centroid) closest.getValue()).getIndex()] += centroid.getWeight(); } int expectedNumPoints = NUM_DATA_POINTS / (1 << NUM_DIMENSIONS); for (double v : cornerWeights) { System.out.printf("%f ", v); } System.out.println(); for (double v : cornerWeights) { assertEquals(expectedNumPoints, v, 0); } }