Beispiel #1
0
    // Stopping criteria
    boolean isDone(KMeansModel model, double[][] newCenters, double[][] oldCenters) {
      if (!isRunning()) return true; // Stopped/cancelled
      // Stopped for running out iterations
      if (model._output._iterations >= _parms._max_iterations) return true;

      // Compute average change in standardized cluster centers
      if (oldCenters == null) return false; // No prior iteration, not stopping
      double average_change = 0;
      for (int clu = 0; clu < _parms._k; clu++)
        average_change +=
            hex.genmodel.GenModel.KMeans_distance(
                oldCenters[clu], newCenters[clu], _isCats, null, null);
      average_change /= _parms._k; // Average change per cluster
      model._output._avg_centroids_chg =
          ArrayUtils.copyAndFillOf(
              model._output._avg_centroids_chg,
              model._output._avg_centroids_chg.length + 1,
              average_change);
      model._output._training_time_ms =
          ArrayUtils.copyAndFillOf(
              model._output._training_time_ms,
              model._output._training_time_ms.length + 1,
              System.currentTimeMillis());
      return average_change < TOLERANCE;
    }
Beispiel #2
0
 @Override
 public void map(Chunk[] cs) {
   for (int row = 0; row < cs[0]._len; row++) {
     double[] values = new double[cs.length];
     // fetch the data - using consistent NA and categorical data handling (same as for training)
     data(values, cs, row, _means, _mults, _modes);
     // compute the distance from the (standardized) cluster centroids
     _tss += hex.genmodel.GenModel.KMeans_distance(_gc, values, _isCats, null, null);
   }
 }
Beispiel #3
0
 /** Return both nearest of N cluster center/centroids, and the square-distance. */
 private static ClusterDist closest(
     double[][] centers, double[] point, String[][] isCats, ClusterDist cd, int count) {
   int min = -1;
   double minSqr = Double.MAX_VALUE;
   for (int cluster = 0; cluster < count; cluster++) {
     double sqr =
         hex.genmodel.GenModel.KMeans_distance(centers[cluster], point, isCats, null, null);
     if (sqr < minSqr) { // Record nearest cluster
       min = cluster;
       minSqr = sqr;
     }
   }
   cd._cluster = min; // Record nearest cluster
   cd._dist = minSqr; // Record square-distance
   return cd; // Return for flow-coding
 }