Exemplo n.º 1
0
  /** 2 variables cannot be clustered into 3 clusters. See issue MATH-436. */
  @Test(expected = NumberIsTooSmallException.class)
  public void testPerformClusterAnalysisToManyClusters() {
    KMeansPlusPlusClusterer<DoublePoint> transformer =
        new KMeansPlusPlusClusterer<DoublePoint>(3, 1, new EuclideanDistance(), random);

    DoublePoint[] points =
        new DoublePoint[] {
          new DoublePoint(new int[] {1959, 325100}), new DoublePoint(new int[] {1960, 373200})
        };

    transformer.cluster(Arrays.asList(points));
  }
Exemplo n.º 2
0
  @Test
  public void testCertainSpace() {
    KMeansPlusPlusClusterer.EmptyClusterStrategy[] strategies = {
      KMeansPlusPlusClusterer.EmptyClusterStrategy.LARGEST_VARIANCE,
      KMeansPlusPlusClusterer.EmptyClusterStrategy.LARGEST_POINTS_NUMBER,
      KMeansPlusPlusClusterer.EmptyClusterStrategy.FARTHEST_POINT
    };
    for (KMeansPlusPlusClusterer.EmptyClusterStrategy strategy : strategies) {
      int numberOfVariables = 27;
      // initialise testvalues
      int position1 = 1;
      int position2 = position1 + numberOfVariables;
      int position3 = position2 + numberOfVariables;
      int position4 = position3 + numberOfVariables;
      // testvalues will be multiplied
      int multiplier = 1000000;

      DoublePoint[] breakingPoints = new DoublePoint[numberOfVariables];
      // define the space which will break the cluster algorithm
      for (int i = 0; i < numberOfVariables; i++) {
        int points[] = {position1, position2, position3, position4};
        // multiply the values
        for (int j = 0; j < points.length; j++) {
          points[j] = points[j] * multiplier;
        }
        DoublePoint DoublePoint = new DoublePoint(points);
        breakingPoints[i] = DoublePoint;
        position1 = position1 + numberOfVariables;
        position2 = position2 + numberOfVariables;
        position3 = position3 + numberOfVariables;
        position4 = position4 + numberOfVariables;
      }

      for (int n = 2; n < 27; ++n) {
        KMeansPlusPlusClusterer<DoublePoint> transformer =
            new KMeansPlusPlusClusterer<DoublePoint>(
                n, 100, new EuclideanDistance(), random, strategy);

        List<? extends Cluster<DoublePoint>> clusters =
            transformer.cluster(Arrays.asList(breakingPoints));

        Assert.assertEquals(n, clusters.size());
        int sum = 0;
        for (Cluster<DoublePoint> cluster : clusters) {
          sum += cluster.getPoints().size();
        }
        Assert.assertEquals(numberOfVariables, sum);
      }
    }
  }
Exemplo n.º 3
0
  /**
   * JIRA: MATH-305
   *
   * <p>Two points, one cluster, one iteration
   */
  @Test
  public void testPerformClusterAnalysisDegenerate() {
    KMeansPlusPlusClusterer<DoublePoint> transformer =
        new KMeansPlusPlusClusterer<DoublePoint>(1, 1);

    DoublePoint[] points =
        new DoublePoint[] {
          new DoublePoint(new int[] {1959, 325100}), new DoublePoint(new int[] {1960, 373200}),
        };
    List<? extends Cluster<DoublePoint>> clusters = transformer.cluster(Arrays.asList(points));
    Assert.assertEquals(1, clusters.size());
    Assert.assertEquals(2, (clusters.get(0).getPoints().size()));
    DoublePoint pt1 = new DoublePoint(new int[] {1959, 325100});
    DoublePoint pt2 = new DoublePoint(new int[] {1960, 373200});
    Assert.assertTrue(clusters.get(0).getPoints().contains(pt1));
    Assert.assertTrue(clusters.get(0).getPoints().contains(pt2));
  }
Exemplo n.º 4
0
  /** Test points that are very close together. See issue MATH-546. */
  @Test
  public void testSmallDistances() {
    // Create a bunch of CloseDoublePoints. Most are identical, but one is different by a
    // small distance.
    int[] repeatedArray = {0};
    int[] uniqueArray = {1};
    DoublePoint repeatedPoint = new DoublePoint(repeatedArray);
    DoublePoint uniquePoint = new DoublePoint(uniqueArray);

    Collection<DoublePoint> points = new ArrayList<DoublePoint>();
    final int NUM_REPEATED_POINTS = 10 * 1000;
    for (int i = 0; i < NUM_REPEATED_POINTS; ++i) {
      points.add(repeatedPoint);
    }
    points.add(uniquePoint);

    // Ask a KMeansPlusPlusClusterer to run zero iterations (i.e., to simply choose initial
    // cluster centers).
    final long RANDOM_SEED = 0;
    final int NUM_CLUSTERS = 2;
    final int NUM_ITERATIONS = 0;
    random.setSeed(RANDOM_SEED);

    KMeansPlusPlusClusterer<DoublePoint> clusterer =
        new KMeansPlusPlusClusterer<DoublePoint>(
            NUM_CLUSTERS, NUM_ITERATIONS, new CloseDistance(), random);
    List<CentroidCluster<DoublePoint>> clusters = clusterer.cluster(points);

    // Check that one of the chosen centers is the unique point.
    boolean uniquePointIsCenter = false;
    for (CentroidCluster<DoublePoint> cluster : clusters) {
      if (cluster.getCenter().equals(uniquePoint)) {
        uniquePointIsCenter = true;
      }
    }
    Assert.assertTrue(uniquePointIsCenter);
  }