// Stopping criteria boolean isDone(KMeansModel model, double[][] newCenters, double[][] oldCenters) { if (!isRunning()) return true; // Stopped/cancelled // Stopped for running out iterations if (model._output._iterations >= _parms._max_iterations) return true; // Compute average change in standardized cluster centers if (oldCenters == null) return false; // No prior iteration, not stopping double average_change = 0; for (int clu = 0; clu < _parms._k; clu++) average_change += hex.genmodel.GenModel.KMeans_distance( oldCenters[clu], newCenters[clu], _isCats, null, null); average_change /= _parms._k; // Average change per cluster model._output._avg_centroids_chg = ArrayUtils.copyAndFillOf( model._output._avg_centroids_chg, model._output._avg_centroids_chg.length + 1, average_change); model._output._training_time_ms = ArrayUtils.copyAndFillOf( model._output._training_time_ms, model._output._training_time_ms.length + 1, System.currentTimeMillis()); return average_change < TOLERANCE; }
// Compute all interesting KMeans stats (errors & variances of clusters, // etc). Return new centers. double[][] computeStatsFillModel( Lloyds task, KMeansModel model, final Vec[] vecs, final double[] means, final double[] mults, final int[] modes) { // Fill in the model based on original destandardized centers if (model._parms._standardize) { model._output._centers_std_raw = task._cMeans; } model._output._centers_raw = destandardize(task._cMeans, _isCats, means, mults); model._output._size = task._size; model._output._withinss = task._cSqr; double ssq = 0; // sum squared error for (int i = 0; i < _parms._k; i++) ssq += model._output._withinss[i]; // sum squared error all clusters model._output._tot_withinss = ssq; // Sum-of-square distance from grand mean if (_parms._k == 1) model._output._totss = model._output._tot_withinss; else { // If data already standardized, grand mean is just the origin TotSS totss = new TotSS(means, mults, modes, _parms.train().domains(), _parms.train().cardinality()) .doAll(vecs); model._output._totss = totss._tss; } model._output._betweenss = model._output._totss - model._output._tot_withinss; // MSE between-cluster model._output._iterations++; // add to scoring history model._output._history_withinss = ArrayUtils.copyAndFillOf( model._output._history_withinss, model._output._history_withinss.length + 1, model._output._tot_withinss); // Two small TwoDimTables - cheap model._output._model_summary = createModelSummaryTable(model._output); model._output._scoring_history = createScoringHistoryTable(model._output); // Take the cluster stats from the model, and assemble them into a model metrics object model._output._training_metrics = makeTrainingMetrics(model); return task._cMeans; // New centers }