示例#1
0
  @Test
  public void testGetCentroidDoubleArrayArray() {
    try {
      Centroid.getCentroid(badNd);
      fail("Should throw IllegalArgumentException");
    } catch (IllegalArgumentException e) {
    }

    // A 20-D centroid
    double[] centroid = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20};
    assertArrayEquals(centroid, Centroid.getCentroid(goodNd), 1E-9);

    // A 3-D array
    double[][] threeD = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}, {10, 11, 12}, {13, 14, 15}, {16, 17, 18}};
    // Its centroid
    double[] centroid3d = {8.5, 9.5, 10.5};
    assertArrayEquals(centroid3d, Centroid.getCentroid(threeD), 1E-9);

    // A 2-D array
    double[][] twoD = {{1, 2}, {4, 5}, {7, 8}, {10, 11}, {13, 14}, {16, 17}};
    // Its centroid
    double[] centroid2d = {8.5, 9.5};
    assertArrayEquals(centroid2d, Centroid.getCentroid(twoD), 1E-9);

    // A 1-D array
    double[][] oneD = {{1}, {4}, {7}, {10}, {13}, {16}};
    // Its centroid
    double[] centroid1d = {8.5};
    assertArrayEquals(centroid1d, Centroid.getCentroid(oneD), 1E-9);
  }
示例#2
0
  private double euclideanDistance(Features point, Centroid centroid) {
    if (point.getNumberOfDim() != centroid.getNumberOfDim()) throw new IllegalArgumentException();

    double sumOfSquares = 0;
    for (int i = 0; i < point.getNumberOfDim(); i++) {
      sumOfSquares +=
          (point.getDim(i) - centroid.getDim(i)) * (point.getDim(i) - centroid.getDim(i));
    }

    return Math.sqrt(sumOfSquares);
  }
示例#3
0
 @Test
 public void testGetCentroidDoubleArray() {
   assertEquals(10.5, Centroid.getCentroid(oneDa), 1E-9);
   assertEquals(10.0, Centroid.getCentroid(oneDb), 1E-9);
   assertEquals(9.0, Centroid.getCentroid(oneDc), 1E-9);
   assertEquals(8.5, Centroid.getCentroid(oneDd), 1E-9);
   assertEquals(8.0, Centroid.getCentroid(oneDe), 1E-9);
   assertEquals(7.5, Centroid.getCentroid(oneDf), 1E-9);
   assertEquals(7.0, Centroid.getCentroid(oneDg), 1E-9);
   assertEquals(6.5, Centroid.getCentroid(oneDh), 1E-9);
   assertEquals(6.0, Centroid.getCentroid(oneDi), 1E-9);
   assertEquals(5.5, Centroid.getCentroid(oneDj), 1E-9);
 }
示例#4
0
  @Test
  public void testBasicClustering() {
    List<? extends WeightedVector> data = cubishTestData(1);

    BallKMeans r = new BallKMeans(new BruteSearch(new EuclideanDistanceMeasure()), 6, 20);
    r.cluster(data);
    for (Centroid centroid : r) {
      for (int i = 0; i < 10; i++) {
        System.out.printf("%10.4f", centroid.get(i));
      }
      System.out.printf("\n");
    }
  }
示例#5
0
  @Test
  public void testClustering() {
    UpdatableSearcher searcher = new BruteSearch(new EuclideanDistanceMeasure());
    BallKMeans clusterer = new BallKMeans(searcher, 1 << NUM_DIMENSIONS, NUM_ITERATIONS);

    long startTime = System.currentTimeMillis();
    clusterer.cluster(syntheticData.getFirst());
    long endTime = System.currentTimeMillis();

    assertEquals(
        "Total weight not preserved",
        totalWeight(syntheticData.getFirst()),
        totalWeight(clusterer),
        1e-9);

    // Verify that each corner of the cube has a centroid very nearby.
    // This is probably FALSE for large-dimensional spaces!
    double maxWeight = 0;
    for (Vector mean : syntheticData.getSecond()) {
      WeightedThing<Vector> v = searcher.search(mean, 1).get(0);
      maxWeight = Math.max(v.getWeight(), maxWeight);
    }
    assertTrue("Maximum weight too large " + maxWeight, maxWeight < 0.05);
    double clusterTime = (endTime - startTime) / 1000.0;
    System.out.printf(
        "%s\n%.2f for clustering\n%.1f us per row\n\n",
        searcher.getClass().getName(),
        clusterTime,
        clusterTime / syntheticData.getFirst().size() * 1e6);

    // verify that the total weight of the centroids near each corner is correct
    double[] cornerWeights = new double[1 << NUM_DIMENSIONS];
    Searcher trueFinder = new BruteSearch(new EuclideanDistanceMeasure());
    for (Vector trueCluster : syntheticData.getSecond()) {
      trueFinder.add(trueCluster);
    }
    for (Centroid centroid : clusterer) {
      WeightedThing<Vector> closest = trueFinder.search(centroid, 1).get(0);
      cornerWeights[((Centroid) closest.getValue()).getIndex()] += centroid.getWeight();
    }
    int expectedNumPoints = NUM_DATA_POINTS / (1 << NUM_DIMENSIONS);
    for (double v : cornerWeights) {
      System.out.printf("%f ", v);
    }
    System.out.println();
    for (double v : cornerWeights) {
      assertEquals(expectedNumPoints, v, 0);
    }
  }
示例#6
0
  @Test
  public void testInitialization() {
    // start with super clusterable data
    List<? extends WeightedVector> data = cubishTestData(0.01);

    // just do initialization of ball k-means.  This should drop a point into each of the clusters
    BallKMeans r = new BallKMeans(new BruteSearch(new EuclideanDistanceMeasure()), 6, 20);
    r.cluster(data);

    // put the centroids into a matrix
    Matrix x = new DenseMatrix(6, 5);
    int row = 0;
    for (Centroid c : r) {
      x.viewRow(row).assign(c.viewPart(0, 5));
      row++;
    }

    // verify that each column looks right.  Should contain zeros except for a single 6.
    final Vector columnNorms =
        x.aggregateColumns(
            new VectorFunction() {
              @Override
              public double apply(Vector f) {
                // return the sum of three discrepancy measures
                return Math.abs(f.minValue())
                    + Math.abs(f.maxValue() - 6)
                    + Math.abs(f.norm(1) - 6);
              }
            });
    // verify all errors are nearly zero
    assertEquals(0, columnNorms.norm(1) / columnNorms.size(), 0.1);

    // verify that the centroids are a permutation of the original ones
    SingularValueDecomposition svd = new SingularValueDecomposition(x);
    Vector s = svd.getS().viewDiagonal().assign(Functions.div(6));
    assertEquals(5, s.getLengthSquared(), 0.05);
    assertEquals(5, s.norm(1), 0.05);
  }
示例#7
0
 @Override
 protected void map(Centroid key, IntWritable value, Context context)
     throws IOException, InterruptedException {
   context.write(new Text(key.toString()), new Text(value.toString()));
 }
示例#8
0
 @Override
 protected void map(Centroid key, Point value, Context context)
     throws IOException, InterruptedException {
   String out = key.toString() + " " + value.toString();
   context.write(new Text(out), new Text(""));
 }