Exemple #1
0
  // KMeans++ re-clustering
  private static double[][] recluster(
      double[][] points, Random rand, int N, Initialization init, String[][] isCats) {
    double[][] res = new double[N][];
    res[0] = points[0];
    int count = 1;
    ClusterDist cd = new ClusterDist();
    switch (init) {
      case Random:
        break;
      case PlusPlus:
        { // k-means++
          while (count < res.length) {
            double sum = 0;
            for (double[] point1 : points) sum += minSqr(res, point1, isCats, cd, count);

            for (double[] point : points) {
              if (minSqr(res, point, isCats, cd, count) >= rand.nextDouble() * sum) {
                res[count++] = point;
                break;
              }
            }
          }
          break;
        }
      case Furthest:
        { // Takes cluster center further from any already chosen ones
          while (count < res.length) {
            double max = 0;
            int index = 0;
            for (int i = 0; i < points.length; i++) {
              double sqr = minSqr(res, points[i], isCats, cd, count);
              if (sqr > max) {
                max = sqr;
                index = i;
              }
            }
            res[count++] = points[index];
          }
          break;
        }
      default:
        throw H2O.fail();
    }
    return res;
  }