Пример #1
0
  @Test
  public void testClustering() {
    UpdatableSearcher searcher = new BruteSearch(new EuclideanDistanceMeasure());
    BallKMeans clusterer = new BallKMeans(searcher, 1 << NUM_DIMENSIONS, NUM_ITERATIONS);

    long startTime = System.currentTimeMillis();
    clusterer.cluster(syntheticData.getFirst());
    long endTime = System.currentTimeMillis();

    assertEquals(
        "Total weight not preserved",
        totalWeight(syntheticData.getFirst()),
        totalWeight(clusterer),
        1e-9);

    // Verify that each corner of the cube has a centroid very nearby.
    // This is probably FALSE for large-dimensional spaces!
    double maxWeight = 0;
    for (Vector mean : syntheticData.getSecond()) {
      WeightedThing<Vector> v = searcher.search(mean, 1).get(0);
      maxWeight = Math.max(v.getWeight(), maxWeight);
    }
    assertTrue("Maximum weight too large " + maxWeight, maxWeight < 0.05);
    double clusterTime = (endTime - startTime) / 1000.0;
    System.out.printf(
        "%s\n%.2f for clustering\n%.1f us per row\n\n",
        searcher.getClass().getName(),
        clusterTime,
        clusterTime / syntheticData.getFirst().size() * 1e6);

    // verify that the total weight of the centroids near each corner is correct
    double[] cornerWeights = new double[1 << NUM_DIMENSIONS];
    Searcher trueFinder = new BruteSearch(new EuclideanDistanceMeasure());
    for (Vector trueCluster : syntheticData.getSecond()) {
      trueFinder.add(trueCluster);
    }
    for (Centroid centroid : clusterer) {
      WeightedThing<Vector> closest = trueFinder.search(centroid, 1).get(0);
      cornerWeights[((Centroid) closest.getValue()).getIndex()] += centroid.getWeight();
    }
    int expectedNumPoints = NUM_DATA_POINTS / (1 << NUM_DIMENSIONS);
    for (double v : cornerWeights) {
      System.out.printf("%f ", v);
    }
    System.out.println();
    for (double v : cornerWeights) {
      assertEquals(expectedNumPoints, v, 0);
    }
  }