// Stopping criteria boolean isDone(KMeansModel model, double[][] newCenters, double[][] oldCenters) { if (!isRunning()) return true; // Stopped/cancelled // Stopped for running out iterations if (model._output._iterations >= _parms._max_iterations) return true; // Compute average change in standardized cluster centers if (oldCenters == null) return false; // No prior iteration, not stopping double average_change = 0; for (int clu = 0; clu < _parms._k; clu++) average_change += hex.genmodel.GenModel.KMeans_distance( oldCenters[clu], newCenters[clu], _isCats, null, null); average_change /= _parms._k; // Average change per cluster model._output._avg_centroids_chg = ArrayUtils.copyAndFillOf( model._output._avg_centroids_chg, model._output._avg_centroids_chg.length + 1, average_change); model._output._training_time_ms = ArrayUtils.copyAndFillOf( model._output._training_time_ms, model._output._training_time_ms.length + 1, System.currentTimeMillis()); return average_change < TOLERANCE; }
@Override public void map(Chunk[] cs) { for (int row = 0; row < cs[0]._len; row++) { double[] values = new double[cs.length]; // fetch the data - using consistent NA and categorical data handling (same as for training) data(values, cs, row, _means, _mults, _modes); // compute the distance from the (standardized) cluster centroids _tss += hex.genmodel.GenModel.KMeans_distance(_gc, values, _isCats, null, null); } }
/** Return both nearest of N cluster center/centroids, and the square-distance. */ private static ClusterDist closest( double[][] centers, double[] point, String[][] isCats, ClusterDist cd, int count) { int min = -1; double minSqr = Double.MAX_VALUE; for (int cluster = 0; cluster < count; cluster++) { double sqr = hex.genmodel.GenModel.KMeans_distance(centers[cluster], point, isCats, null, null); if (sqr < minSqr) { // Record nearest cluster min = cluster; minSqr = sqr; } } cd._cluster = min; // Record nearest cluster cd._dist = minSqr; // Record square-distance return cd; // Return for flow-coding }